Add CDH plugin to Sahara

Features:
* cluster provisioning
* scaling
* edp
* validation
* swift support

TODO (in other CRs):
* unit tests
* integration tests
* data locality

partially implement: blueprint cdh-plugin

Change-Id: Ie231c434d61ba9a379a6ee2fd0f0bf2af21ce44d
This commit is contained in:
Sergey Reshetnyak 2014-07-16 17:54:32 +04:00
parent 5aa5b5d17f
commit 9b0d805cc2
30 changed files with 5925 additions and 0 deletions

View File

View File

@ -0,0 +1,195 @@
# Copyright (c) 2014 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import functools
# cm_api client is not present in OS requirements
try:
from cm_api import api_client
from cm_api.endpoints import services
except ImportError:
api_client = None
services = None
from sahara.i18n import _
from sahara.plugins.cdh import utils as pu
from sahara.plugins.general import exceptions as ex
CM_DEFAULT_USERNAME = 'admin'
CM_DEFAULT_PASSWD = 'admin'
HDFS_SERVICE_NAME = 'hdfs01'
YARN_SERVICE_NAME = 'yarn01'
OOZIE_SERVICE_NAME = 'oozie01'
def have_cm_api_libs():
return api_client and services
def cloudera_cmd(f):
@functools.wraps(f)
def wrapper(*args, **kwargs):
for cmd in f(*args, **kwargs):
result = cmd.wait()
if not result.success:
raise ex.HadoopProvisionError(result.resultMessage)
return wrapper
def get_api_client(cluster):
manager_ip = pu.get_manager(cluster).management_ip
return api_client.ApiResource(manager_ip, username=CM_DEFAULT_USERNAME,
password=CM_DEFAULT_PASSWD)
def get_cloudera_cluster(cluster):
api = get_api_client(cluster)
return api.get_cluster(cluster.name)
@cloudera_cmd
def start_instances(cluster):
cm_cluster = get_cloudera_cluster(cluster)
yield cm_cluster.start()
def delete_instances(cluster, instances):
api = get_api_client(cluster)
hosts = api.get_all_hosts(view='full')
hostsnames_to_deleted = [i.fqdn() for i in instances]
for host in hosts:
if host.hostname in hostsnames_to_deleted:
api.delete_host(host.hostId)
def get_service(process, cluster=None, instance=None):
cm_cluster = None
if cluster:
cm_cluster = get_cloudera_cluster(cluster)
elif instance:
cm_cluster = get_cloudera_cluster(instance.node_group.cluster)
else:
raise ValueError(_("'cluster' or 'instance' argument missed"))
if process in ['NAMENODE', 'DATANODE', 'SECONDARYNAMENODE']:
return cm_cluster.get_service(HDFS_SERVICE_NAME)
elif process in ['RESOURCEMANAGER', 'NODEMANAGER', 'JOBHISTORY']:
return cm_cluster.get_service(YARN_SERVICE_NAME)
elif process in ['OOZIE_SERVER']:
return cm_cluster.get_service(OOZIE_SERVICE_NAME)
else:
raise ValueError(
_("Process %(process)s is not supported by CDH plugin") %
{'process': process})
def decomission_nodes(cluster, process, role_names):
service = get_service(process, cluster)
service.decommission(*role_names).wait()
for role_name in role_names:
service.delete_role(role_name)
@cloudera_cmd
def refresh_nodes(cluster, process, service_name):
cm_cluster = get_cloudera_cluster(cluster)
service = cm_cluster.get_service(service_name)
nds = [n.name for n in service.get_roles_by_type(process)]
for nd in nds:
for st in service.refresh(nd):
yield st
@cloudera_cmd
def deploy_configs(cluster):
cm_cluster = get_cloudera_cluster(cluster)
yield cm_cluster.deploy_client_config()
@cloudera_cmd
def update_configs(instance):
for process in instance.node_group.node_processes:
service = get_service(process, instance=instance)
yield service.deploy_client_config(get_role_name(instance, process))
def get_role_name(instance, service):
# NOTE: role name must match regexp "[_A-Za-z][-_A-Za-z0-9]{0,63}"
shortcuts = {
'NAMENODE': 'NN',
'DATANODE': 'DN',
'SECONDARYNAMENODE': 'SNN',
'RESOURCEMANAGER': 'RM',
'NODEMANAGER': 'NM',
'JOBHISTORY': 'JS',
'OOZIE_SERVER': 'OS',
'SERVICEMONITOR': 'SM',
'HOSTMONITOR': 'HM',
'EVENTSERVER': 'ES',
'ALERTPUBLISHER': 'AP'
}
return '%s_%s' % (shortcuts.get(service, service),
instance.hostname().replace('-', '_'))
def create_mgmt_service(cluster):
api = get_api_client(cluster)
cm = api.get_cloudera_manager()
setup_info = services.ApiServiceSetupInfo()
manager = pu.get_manager(cluster)
hostname = manager.fqdn()
processes = ['SERVICEMONITOR', 'HOSTMONITOR',
'EVENTSERVER', 'ALERTPUBLISHER']
for proc in processes:
setup_info.add_role_info(get_role_name(manager, proc), proc, hostname)
cm.create_mgmt_service(setup_info)
cm.hosts_start_roles([hostname])
@cloudera_cmd
def format_namenode(hdfs_service):
for nn in hdfs_service.get_roles_by_type('NAMENODE'):
yield hdfs_service.format_hdfs(nn.name)[0]
@cloudera_cmd
def start_service(service):
yield service.start()
@cloudera_cmd
def start_roles(service, role_names):
for role in service.start_roles(*role_names):
yield role
@cloudera_cmd
def create_yarn_job_history_dir(yarn_service):
yield yarn_service.create_yarn_job_history_dir()
@cloudera_cmd
def create_oozie_db(oozie_service):
yield oozie_service.create_oozie_db()
@cloudera_cmd
def install_oozie_sharelib(oozie_service):
yield oozie_service.install_oozie_sharelib()

View File

@ -0,0 +1,107 @@
# Copyright (c) 2014 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from sahara.i18n import _
from sahara.plugins.general import exceptions as ex
def _root(remote, cmd, **kwargs):
return remote.execute_command(cmd, run_as_root=True, **kwargs)
def _get_os_distrib(remote):
return remote.execute_command('lsb_release -is')[1].strip().lower()
def is_centos_os(remote):
return _get_os_distrib(remote) == 'centos'
def is_ubuntu_os(remote):
return _get_os_distrib(remote) == 'ubuntu'
def is_pre_installed_cdh(remote):
code, out = remote.execute_command('ls /etc/init.d/cloudera-scm-server',
raise_when_error=False)
return code == 0
def stop_resourcemanager(remote):
_root(remote, 'service hadoop-yarn-resourcemanager stop')
def stop_nodemanager(remote):
_root(remote, 'service hadoop-yarn-nodemanager stop')
def stop_historyserver(remote):
_root(remote, 'service hadoop-mapreduce-historyserver stop')
def start_cloudera_db(remote):
_root(remote, 'service cloudera-scm-server-db start')
def start_manager(remote):
_root(remote, 'service cloudera-scm-server start')
def configure_agent(remote, manager_address):
remote.replace_remote_string('/etc/cloudera-scm-agent/config.ini',
'server_host=.*',
'server_host=%s' % manager_address)
def start_agent(remote):
_root(remote, 'service cloudera-scm-agent start')
def install_packages(remote, packages, timeout=1800):
distrib = _get_os_distrib(remote)
if distrib == 'ubuntu':
cmd = 'apt-get install -y %s'
elif distrib == 'centos':
cmd = 'yum install %s'
else:
raise ex.HadoopProvisionError(
_("OS on image is not supported by CDH plugin"))
cmd = cmd % ' '.join(packages)
_root(remote, cmd, timeout=timeout)
def update_repository(remote):
if is_ubuntu_os(remote):
_root(remote, 'apt-get update')
def push_remote_file(remote, src, dst):
cmd = 'curl %s -o %s' % (src, dst)
_root(remote, cmd)
def add_ubuntu_repository(r, repo_list_url, repo_name):
push_remote_file(r, repo_list_url,
'/etc/apt/sources.list.d/%s.list' % repo_name)
def add_apt_key(remote, key_url):
cmd = 'wget -qO - %s | apt-key add -' % key_url
_root(remote, cmd)
def add_centos_repository(r, repo_list_url, repo_name):
push_remote_file(r, repo_list_url, '/etc/yum.repos.d/%s.repo' % repo_name)

View File

@ -0,0 +1,164 @@
# Copyright (c) 2014 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
from sahara.plugins import provisioning as p
from sahara.utils import files as f
DEFAULT_CDH5_UBUNTU_REPO_LIST_URL = ('http://archive.cloudera.com/cdh5/ubuntu'
'/precise/amd64/cdh/cloudera.list')
DEFAULT_CDH5_UBUNTU_REPO_KEY_URL = ('http://archive.cloudera.com/cdh5/ubuntu'
'/precise/amd64/cdh/archive.key')
DEFAULT_CM5_UBUNTU_REPO_LIST_URL = ('http://archive.cloudera.com/cm5/ubuntu'
'/precise/amd64/cm/cloudera.list')
DEFAULT_CM5_UBUNTU_REPO_KEY_URL = ('http://archive.cloudera.com/cm5/ubuntu'
'/precise/amd64/cm/archive.key')
DEFAULT_CDH5_CENTOS_REPO_LIST_URL = ('http://archive.cloudera.com/cdh5/redhat'
'/6/x86_64/cdh/cloudera-cdh5.repo')
DEFAULT_CM5_CENTOS_REPO_LIST_URL = ('http://archive.cloudera.com/cm5/redhat'
'/6/x86_64/cm/cloudera-manager.repo')
DEFAULT_SWIFT_LIB_URL = ('https://repository.cloudera.com/artifactory/repo/org'
'/apache/hadoop/hadoop-openstack/2.3.0-cdh5.1.0'
'/hadoop-openstack-2.3.0-cdh5.1.0.jar')
CDH5_REPO_URL = p.Config(
'CDH5 repo list URL', 'general', 'cluster', priority=1,
default_value="")
CDH5_REPO_KEY_URL = p.Config(
'CDH5 repo key URL (for debian-based only)', 'general', 'cluster',
priority=1, default_value="")
CM5_REPO_URL = p.Config(
'CM5 repo list URL', 'general', 'cluster', priority=1,
default_value="")
CM5_REPO_KEY_URL = p.Config(
'CM5 repo key URL (for debian-based only)', 'general', 'cluster',
priority=1, default_value="")
ENABLE_SWIFT = p.Config('Enable Swift', 'general', 'cluster',
config_type='bool', priority=1,
default_value=True)
SWIFT_LIB_URL = p.Config(
'Hadoop OpenStack library URL', 'general', 'cluster', priority=1,
default_value=DEFAULT_SWIFT_LIB_URL,
description=("Library that adds Swift support to CDH. The file will be "
"downloaded from VM."))
def _get_cluster_plugin_configs():
return [CDH5_REPO_URL, CDH5_REPO_KEY_URL, CM5_REPO_URL, CM5_REPO_KEY_URL,
ENABLE_SWIFT, SWIFT_LIB_URL]
# ng wide configs
def _load_json(path_to_file):
data = f.get_file_text(path_to_file)
return json.loads(data)
path_to_config = 'plugins/cdh/resources/'
hdfs_confs = _load_json(path_to_config + 'hdfs-service.json')
namenode_confs = _load_json(path_to_config + 'hdfs-namenode.json')
datanode_confs = _load_json(path_to_config + 'hdfs-datanode.json')
secnamenode_confs = _load_json(path_to_config + 'hdfs-secondarynamenode.json')
yarn_confs = _load_json(path_to_config + 'yarn-service.json')
resourcemanager_confs = _load_json(
path_to_config + 'yarn-resourcemanager.json')
nodemanager_confs = _load_json(path_to_config + 'yarn-nodemanager.json')
jobhistory_confs = _load_json(path_to_config + 'yarn-jobhistory.json')
oozie_service_confs = _load_json(path_to_config + 'oozie-service.json')
oozie_role_confs = _load_json(path_to_config + 'oozie-oozie.json')
priority_one_confs = _load_json(path_to_config + 'priority-one-confs.json')
def _prepare_value(value):
if not value:
return ""
return value.replace('\n', ' ')
def _init_configs(confs, app_target, scope):
cfgs = []
for cfg in confs:
priority = 1 if cfg['name'] in priority_one_confs else 2
c = p.Config(cfg['name'], app_target, scope, priority=priority,
default_value=_prepare_value(cfg['value']),
description=cfg['desc'], is_optional=True)
cfgs.append(c)
return cfgs
def _get_ng_plugin_configs():
cfg = []
cfg += _init_configs(hdfs_confs, 'HDFS', 'cluster')
cfg += _init_configs(namenode_confs, 'NAMENODE', 'node')
cfg += _init_configs(datanode_confs, 'DATANODE', 'node')
cfg += _init_configs(secnamenode_confs, 'SECONDARYNAMENODE', 'node')
cfg += _init_configs(yarn_confs, 'YARN', 'cluster')
cfg += _init_configs(resourcemanager_confs, 'RESOURCEMANAGER', 'node')
cfg += _init_configs(nodemanager_confs, 'NODEMANAGER', 'node')
cfg += _init_configs(jobhistory_confs, 'JOBHISTORY', 'node')
cfg += _init_configs(oozie_service_confs, 'OOZIE', 'cluster')
cfg += _init_configs(oozie_role_confs, 'OOZIE', 'node')
return cfg
def get_plugin_configs():
cluster_wide = _get_cluster_plugin_configs()
ng_wide = _get_ng_plugin_configs()
return cluster_wide + ng_wide
def _get_config_value(cluster, key):
return cluster.cluster_configs.get(
'general', {}).get(key.name, key.default_value)
def get_cdh5_repo_url(cluster):
return _get_config_value(cluster, CDH5_REPO_URL)
def get_cdh5_key_url(cluster):
return _get_config_value(cluster, CDH5_REPO_KEY_URL)
def get_cm5_repo_url(cluster):
return _get_config_value(cluster, CM5_REPO_URL)
def get_cm5_key_url(cluster):
return _get_config_value(cluster, CM5_REPO_KEY_URL)
def is_swift_enabled(cluster):
return _get_config_value(cluster, ENABLE_SWIFT)
def get_swift_lib_url(cluster):
return _get_config_value(cluster, SWIFT_LIB_URL)

View File

@ -0,0 +1,373 @@
# Copyright (c) 2014 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import telnetlib
import six
from sahara import context
from sahara.i18n import _
from sahara.i18n import _LI
from sahara.openstack.common import log as logging
from sahara.openstack.common import timeutils
from sahara.plugins.cdh import cloudera_utils as cu
from sahara.plugins.cdh import commands as cmd
from sahara.plugins.cdh import config_helper as c_helper
from sahara.plugins.cdh import utils as pu
from sahara.plugins.general import exceptions as ex
from sahara.plugins.general import utils as gu
from sahara.swift import swift_helper
from sahara.utils import xmlutils
CM_API_PORT = 7180
CDH_VERSION = 'CDH5'
HDFS_SERVICE_TYPE = 'HDFS'
YARN_SERVICE_TYPE = 'YARN'
OOZIE_SERVICE_TYPE = 'OOZIE'
PATH_TO_CORE_SITE_XML = '/etc/hadoop/conf/core-site.xml'
HADOOP_LIB_DIR = '/usr/lib/hadoop-mapreduce'
PACKAGES = [
'cloudera-manager-agent',
'cloudera-manager-daemons',
'cloudera-manager-server',
'cloudera-manager-server-db',
'hadoop-hdfs-datanode',
'hadoop-hdfs-namenode',
'hadoop-hdfs-secondarynamenode',
'hadoop-mapreduce',
'hadoop-mapreduce-historyserver',
'hadoop-yarn-nodemanager',
'hadoop-yarn-resourcemanager',
'oozie',
'oracle-j2sdk1.7',
]
LOG = logging.getLogger(__name__)
def _merge_dicts(a, b):
res = {}
def update(cfg):
for service, configs in six.iteritems(cfg):
if not res.get(service):
res[service] = {}
res[service].update(configs)
update(a)
update(b)
return res
def _get_configs(service, cluster=None, node_group=None):
def get_hadoop_dirs(mount_points, suffix):
return ','.join([x + suffix for x in mount_points])
all_confs = {
'OOZIE': {
'mapreduce_yarn_service': cu.YARN_SERVICE_NAME
},
'YARN': {
'hdfs_service': cu.HDFS_SERVICE_NAME
}
}
if node_group:
paths = node_group.storage_paths()
ng_default_confs = {
'NAMENODE': {
'dfs_name_dir_list': get_hadoop_dirs(paths, '/fs/nn')
},
'SECONDARYNAMENODE': {
'fs_checkpoint_dir_list': get_hadoop_dirs(paths, '/fs/snn')
},
'DATANODE': {
'dfs_data_dir_list': get_hadoop_dirs(paths, '/fs/dn')
},
'NODEMANAGER': {
'yarn_nodemanager_local_dirs': get_hadoop_dirs(paths,
'/yarn/local')
}
}
ng_user_confs = node_group.node_configs
all_confs = _merge_dicts(all_confs, ng_user_confs)
all_confs = _merge_dicts(all_confs, ng_default_confs)
if cluster:
all_confs = _merge_dicts(all_confs, cluster.cluster_configs)
return all_confs.get(service, {})
def configure_cluster(cluster):
instances = gu.get_instances(cluster)
if not cmd.is_pre_installed_cdh(pu.get_manager(cluster).remote()):
_configure_os(instances)
_install_packages(instances, PACKAGES)
_post_install(instances)
_start_cloudera_agents(instances)
_start_cloudera_manager(cluster)
_configure_manager(cluster)
_create_services(cluster)
_configure_services(cluster)
_configure_instances(instances)
cu.deploy_configs(cluster)
if c_helper.is_swift_enabled(cluster):
_configure_swift(instances)
def scale_cluster(cluster, instances):
if not cmd.is_pre_installed_cdh(pu.get_manager(cluster).remote()):
_configure_os(instances)
_install_packages(instances, PACKAGES)
_post_install(instances)
_start_cloudera_agents(instances)
for instance in instances:
_configure_instance(instance)
cu.update_configs(instance)
if 'DATANODE' in instance.node_group.node_processes:
cu.refresh_nodes(cluster, 'DATANODE', cu.HDFS_SERVICE_NAME)
_configure_swift_to_inst(instance)
if 'DATANODE' in instance.node_group.node_processes:
hdfs = cu.get_service('DATANODE', instance=instance)
cu.start_roles(hdfs, cu.get_role_name(instance, 'DATANODE'))
if 'NODEMANAGER' in instance.node_group.node_processes:
yarn = cu.get_service('NODEMANAGER', instance=instance)
cu.start_roles(yarn, cu.get_role_name(instance, 'NODEMANAGER'))
def decomission_cluster(cluster, instances):
dns = []
nms = []
for i in instances:
if 'DATANODE' in i.node_group.node_processes:
dns.append(cu.get_role_name(i, 'DATANODE'))
if 'NODEMANAGER' in i.node_group.node_processes:
nms.append(cu.get_role_name(i, 'NODEMANAGER'))
if dns:
cu.decomission_nodes(cluster, 'DATANODE', dns)
if nms:
cu.decomission_nodes(cluster, 'NODEMANAGER', nms)
cu.delete_instances(cluster, instances)
cu.refresh_nodes(cluster, 'DATANODE', cu.HDFS_SERVICE_NAME)
cu.refresh_nodes(cluster, 'NODEMANAGER', cu.YARN_SERVICE_NAME)
def _configure_os(instances):
with context.ThreadGroup() as tg:
for inst in instances:
tg.spawn('cdh-repo-conf-%s' % inst.instance_name,
_configure_repo_from_inst, inst)
def _configure_repo_from_inst(instance):
LOG.debug("Configure repos from instance '%(instance)s'" % {
'instance': instance.instance_name})
cluster = instance.node_group.cluster
cdh5_repo = c_helper.get_cdh5_repo_url(cluster)
cdh5_key = c_helper.get_cdh5_key_url(cluster)
cm5_repo = c_helper.get_cm5_repo_url(cluster)
cm5_key = c_helper.get_cm5_key_url(cluster)
with instance.remote() as r:
if cmd.is_ubuntu_os(r):
cdh5_repo = cdh5_repo or c_helper.DEFAULT_CDH5_UBUNTU_REPO_LIST_URL
cdh5_key = cdh5_key or c_helper.DEFAULT_CDH5_UBUNTU_REPO_KEY_URL
cm5_repo = cm5_repo or c_helper.DEFAULT_CM5_UBUNTU_REPO_LIST_URL
cm5_key = cm5_key or c_helper.DEFAULT_CM5_UBUNTU_REPO_KEY_URL
cmd.add_ubuntu_repository(r, cdh5_repo, 'cdh')
cmd.add_apt_key(r, cdh5_key)
cmd.add_ubuntu_repository(r, cm5_repo, 'cm')
cmd.add_apt_key(r, cm5_key)
cmd.update_repository(r)
if cmd.is_centos_os(r):
cdh5_repo = cdh5_repo or c_helper.DEFAULT_CDH5_CENTOS_REPO_LIST_URL
cm5_repo = cm5_repo or c_helper.DEFAULT_CM5_CENTOS_REPO_LIST_URL
cmd.add_centos_repository(r, cdh5_repo, 'cdh')
cmd.add_centos_repository(r, cm5_repo, 'cm')
def _install_packages(instances, packages):
with context.ThreadGroup() as tg:
for i in instances:
tg.spawn('cdh-inst-pkgs-%s' % i.instance_name,
_install_pkgs, i, packages)
def _install_pkgs(instance, packages):
with instance.remote() as r:
cmd.install_packages(r, packages)
def _post_install(instances):
with context.ThreadGroup() as tg:
for i in instances:
tg.spawn('cdh-post-inst-%s' % i.instance_name,
_stop_services, i)
def _stop_services(instance):
with instance.remote() as r:
cmd.stop_resourcemanager(r)
cmd.stop_nodemanager(r)
cmd.stop_historyserver(r)
def _start_cloudera_agents(instances):
with context.ThreadGroup() as tg:
for i in instances:
tg.spawn('cdh-agent-start-%s' % i.instance_name,
_start_cloudera_agent, i)
def _start_cloudera_agent(instance):
mng_hostname = pu.get_manager(instance.node_group.cluster).hostname()
with instance.remote() as r:
cmd.configure_agent(r, mng_hostname)
cmd.start_agent(r)
def _start_cloudera_manager(cluster):
manager = pu.get_manager(cluster)
with manager.remote() as r:
cmd.start_cloudera_db(r)
cmd.start_manager(r)
timeout = 300
LOG.debug("Waiting %(timeout)s seconds for Manager to start : " % {
'timeout': timeout})
s_time = timeutils.utcnow()
while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout:
try:
conn = telnetlib.Telnet(manager.management_ip, CM_API_PORT)
conn.close()
break
except IOError:
context.sleep(2)
else:
message = _("Cloudera Manager failed to start in %(timeout)s minutes "
"on node '%(node)s' of cluster '%(cluster)s'") % {
'timeout': timeout / 60,
'node': manager.management_ip,
'cluster': cluster.name}
raise ex.HadoopProvisionError(message)
LOG.info(_LI("Cloudera Manager has been started"))
def _create_services(cluster):
api = cu.get_api_client(cluster)
cm_cluster = api.create_cluster(cluster.name, CDH_VERSION)
cm_cluster.create_service(cu.HDFS_SERVICE_NAME, HDFS_SERVICE_TYPE)
cm_cluster.create_service(cu.YARN_SERVICE_NAME, YARN_SERVICE_TYPE)
cm_cluster.create_service(cu.OOZIE_SERVICE_NAME, OOZIE_SERVICE_TYPE)
def _configure_services(cluster):
cm_cluster = cu.get_cloudera_cluster(cluster)
hdfs = cm_cluster.get_service(cu.HDFS_SERVICE_NAME)
hdfs.update_config(_get_configs(HDFS_SERVICE_TYPE, cluster=cluster))
yarn = cm_cluster.get_service(cu.YARN_SERVICE_NAME)
yarn.update_config(_get_configs(YARN_SERVICE_TYPE, cluster=cluster))
oozie = cm_cluster.get_service(cu.OOZIE_SERVICE_NAME)
oozie.update_config(_get_configs(OOZIE_SERVICE_TYPE, cluster=cluster))
def _configure_instances(instances):
for inst in instances:
_configure_instance(inst)
def _configure_instance(instance):
for process in instance.node_group.node_processes:
_add_role(instance, process)
def _add_role(instance, process):
if process in ['MANAGER']:
return
service = cu.get_service(process, instance=instance)
role = service.create_role(cu.get_role_name(instance, process),
process, instance.fqdn())
role.update_config(_get_configs(process, node_group=instance.node_group))
def _configure_manager(cluster):
cu.create_mgmt_service(cluster)
def _configure_swift(instances):
with context.ThreadGroup() as tg:
for i in instances:
tg.spawn('cdh-swift-conf-%s' % i.instance_name,
_configure_swift_to_inst, i)
def _configure_swift_to_inst(instance):
cluster = instance.node_group.cluster
with instance.remote() as r:
r.execute_command('sudo curl %s -o %s/hadoop-openstack.jar' % (
c_helper.get_swift_lib_url(cluster), HADOOP_LIB_DIR))
core_site = r.read_file_from(PATH_TO_CORE_SITE_XML)
configs = xmlutils.parse_hadoop_xml_with_name_and_value(core_site)
configs.extend(swift_helper.get_swift_configs())
confs = dict((c['name'], c['value']) for c in configs)
new_core_site = xmlutils.create_hadoop_xml(confs)
r.write_file_to(PATH_TO_CORE_SITE_XML, new_core_site, run_as_root=True)
def start_cluster(cluster):
cm_cluster = cu.get_cloudera_cluster(cluster)
hdfs = cm_cluster.get_service(cu.HDFS_SERVICE_NAME)
cu.format_namenode(hdfs)
cu.start_service(hdfs)
yarn = cm_cluster.get_service(cu.YARN_SERVICE_NAME)
cu.create_yarn_job_history_dir(yarn)
cu.start_service(yarn)
oozie = cm_cluster.get_service(cu.OOZIE_SERVICE_NAME)
cu.create_oozie_db(oozie)
cu.install_oozie_sharelib(oozie)
cu.start_service(oozie)

View File

@ -0,0 +1,106 @@
# Copyright (c) 2014 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from sahara import conductor
from sahara import context
from sahara.plugins.cdh import config_helper as c_helper
from sahara.plugins.cdh import deploy as dp
from sahara.plugins.cdh import utils as cu
from sahara.plugins.cdh import validation as vl
from sahara.plugins import provisioning as p
conductor = conductor.API
class CDHPluginProvider(p.ProvisioningPluginBase):
def get_title(self):
return "Cloudera Plugin"
def get_description(self):
return ("This plugin provides an ability to launch CDH clusters with"
"Cloudera Manager management console.")
def get_versions(self):
return ['5']
def get_node_processes(self, hadoop_version):
return {
"CLOUDERA": ['MANAGER'],
"HDFS": [],
"NAMENODE": ['NAMENODE'],
"DATANODE": ['DATANODE'],
"SECONDARYNAMENODE": ['SECONDARYNAMENODE'],
"YARN": [],
"RESOURCEMANAGER": ['RESOURCEMANAGER'],
"NODEMANAGER": ['NODEMANAGER'],
"JOBHISTORY": ['JOBHISTORY'],
"OOZIE": ['OOZIE_SERVER']
}
def get_configs(self, hadoop_version):
return c_helper.get_plugin_configs()
def configure_cluster(self, cluster):
dp.configure_cluster(cluster)
def start_cluster(self, cluster):
dp.start_cluster(cluster)
self._set_cluster_info(cluster)
def validate(self, cluster):
vl.validate_cluster_creating(cluster)
def scale_cluster(self, cluster, instances):
dp.scale_cluster(cluster, instances)
def decommission_nodes(self, cluster, instances):
dp.decomission_cluster(cluster, instances)
def validate_scaling(self, cluster, existing, additional):
vl.validate_existing_ng_scaling(cluster, existing)
vl.validate_additional_ng_scaling(cluster, additional)
def get_hdfs_user(self):
return 'hdfs'
def get_oozie_server(self, cluster):
return cu.get_oozie(cluster)
def get_oozie_server_uri(self, cluster):
oozie_ip = cu.get_oozie(cluster).management_ip
return 'http://%s:11000/oozie' % oozie_ip
def get_name_node_uri(self, cluster):
namenode_ip = cu.get_namenode(cluster).fqdn()
return 'hdfs://%s:8020' % namenode_ip
def get_resource_manager_uri(self, cluster):
resourcemanager_ip = cu.get_resourcemanager(cluster).fqdn()
return '%s:8032' % resourcemanager_ip
def _set_cluster_info(self, cluster):
mng = cu.get_manager(cluster)
info = {
'Cloudera Manager': {
'Web UI': 'http://%s:7180' % mng.management_ip,
'Username': 'admin',
'Password': 'admin'
}
}
ctx = context.ctx()
conductor.cluster_update(ctx, cluster, {'info': info})

View File

@ -0,0 +1,90 @@
# Copyright (c) 2014 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
from cm_api import api_client
# -- cm config --
cm_address = 'localhost'
cm_port = 7180
cm_username = 'admin'
cm_password = 'admin'
hdfs_service_name = 'hdfs01'
yarn_service_name = 'yarn01'
oozie_service_name = 'oozie01'
def get_cm_api():
return api_client.ApiResource(cm_address, server_port=cm_port,
username=cm_username, password=cm_password)
def get_cluster(api):
return api.get_all_clusters()[0]
def process_service(service, service_name):
for role_cfgs in service.get_all_role_config_groups():
role_cm_cfg = role_cfgs.get_config(view='full')
role_cfg = parse_config(role_cm_cfg)
role_name = role_cfgs.displayName.split(' ')[0].lower()
write_cfg(role_cfg, '%s-%s.json' % (service_name, role_name))
service_cm_cfg = service.get_config(view='full')[0]
service_cfg = parse_config(service_cm_cfg)
write_cfg(service_cfg, '%s-service.json' % service_name)
def parse_config(config):
cfg = []
for name, value in config.iteritems():
p = {
'name': value.name,
'value': value.default,
'display_name': value.displayName,
'desc': value.description
}
cfg.append(p)
return cfg
def write_cfg(cfg, file_name):
to_write = json.dumps(cfg, sort_keys=True, indent=4,
separators=(',', ': '))
with open(file_name, 'w') as f:
f.write(to_write)
def main():
client = get_cm_api()
cluster = get_cluster(client)
hdfs = cluster.get_service(hdfs_service_name)
process_service(hdfs, 'hdfs')
yarn = cluster.get_service(yarn_service_name)
process_service(yarn, 'yarn')
oozie = cluster.get_service(oozie_service_name)
process_service(oozie, 'oozie')
if __name__ == '__main__':
main()

View File

@ -0,0 +1,3 @@
#!/bin/bash
tox -evenv -- python $(dirname $0)/cdh_config.py $*

View File

@ -0,0 +1,44 @@
[
{
"desc": "These arguments will be passed as part of the Java command line. Commonly, garbage collection flags or extra debugging flags would be passed here.",
"display_name": "Java Configuration Options for Balancer",
"name": "balancer_java_opts",
"value": ""
},
{
"desc": "<p>This file contains the rules which govern how log messages are turned into events by the custom log4j appender that this role loads. It is in JSON format, and is composed of a list of rules. Every log message is evaluated against each of these rules in turn to decide whether or not to send an event for that message.</p><p>Each rule has some or all of the following fields:</p><ul><li><span class='code'>alert</span> - whether or not events generated from this rule should be promoted to alerts. A value of \"true\" will cause alerts to be generated. If not specified, the default is \"false\".</li><li><span class='code'>rate</span> <strong>(mandatory)</strong> - the maximum number of log messages matching this rule that may be sent as events every minute. If more than <tt>rate</tt> matching log messages are received in a single minute, the extra messages are ignored. If rate is less than 0, the number of messages per minute is unlimited.</li><li><span class='code'>periodminutes</span> - the number of minutes during which the publisher will only publish <tt>rate</tt> events or fewer. If not specified, the default is <strong>one minute</strong></li><li><span class='code'>threshold</span> - apply this rule only to messages with this log4j severity level or above. An example is \"WARN\" for warning level messages or higher.</li><li><span class='code'>content</span> - match only those messages whose contents match this regular expression.</li><li><span class='code'>exceptiontype</span> - match only those messages which are part of an exception message. The exception type must match this regular expression.</li></ul><br/><p>Example:<span class='code'>{\"alert\": false, \"rate\": 10, \"exceptiontype\": \"java.lang.StringIndexOutOfBoundsException\"}</span></p><p>This rule will send events to Cloudera Manager for every <span class='code'>StringIndexOutOfBoundsException</span>, up to a maximum of 10 every minute.</p>",
"display_name": "Rules to Extract Events from Log Files",
"name": "log_event_whitelist",
"value": "{\n \"version\": \"0\",\n \"rules\": [\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"FATAL\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Instead, use .*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Use .* instead\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.io.IOException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.net.SocketException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.net.SocketClosedException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.io.EOFException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.nio.channels.CancelledKeyException\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 2, \"exceptiontype\": \".*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"Unknown job [^ ]+ being deleted.*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"Error executing shell command .+ No such process.+\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\".*attempt to override final parameter.+\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"[^ ]+ is a deprecated filesystem name. Use.*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\"}\n ]\n}\n"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>hdfs-site.xml</strong> for this role only.",
"display_name": "Balancer Advanced Configuration Snippet (Safety Valve) for hdfs-site.xml",
"name": "balancer_config_safety_valve",
"value": null
},
{
"desc": "The policy that should be used to rebalance HDFS storage. The default DataNode policy balances the storage at the DataNode level. This is similar to the balancing policy from prior releases. The BlockPool policy balances the storage at the block pool level as well as at the Datanode level. The BlockPool policy is relevant only to a Federated HDFS service.",
"display_name": "Rebalancing Policy",
"name": "rebalancing_policy",
"value": "DataNode"
},
{
"desc": "The percentage deviation from average utilization, after which a node will be rebalanced. (for example, '10.0' for 10%)",
"display_name": "Rebalancing Threshold",
"name": "rebalancer_threshold",
"value": "10.0"
},
{
"desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.",
"display_name": "Enable Configuration Change Alerts",
"name": "enable_config_alerts",
"value": "false"
},
{
"desc": "Maximum size for the Java Process heap memory. Passed to Java -Xmx. Measured in bytes.",
"display_name": "Java Heap Size of Balancer in Bytes",
"name": "balancer_java_heapsize",
"value": "1073741824"
}
]

View File

@ -0,0 +1,380 @@
[
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory. Specified as a percentage of the capacity on that filesystem. This setting is not used if a Log Directory Free Space Monitoring Absolute Thresholds setting is configured.",
"display_name": "Log Directory Free Space Monitoring Percentage Thresholds",
"name": "log_directory_free_space_percentage_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"never\"}"
},
{
"desc": "<p>This file contains the rules which govern how log messages are turned into events by the custom log4j appender that this role loads. It is in JSON format, and is composed of a list of rules. Every log message is evaluated against each of these rules in turn to decide whether or not to send an event for that message.</p><p>Each rule has some or all of the following fields:</p><ul><li><span class='code'>alert</span> - whether or not events generated from this rule should be promoted to alerts. A value of \"true\" will cause alerts to be generated. If not specified, the default is \"false\".</li><li><span class='code'>rate</span> <strong>(mandatory)</strong> - the maximum number of log messages matching this rule that may be sent as events every minute. If more than <tt>rate</tt> matching log messages are received in a single minute, the extra messages are ignored. If rate is less than 0, the number of messages per minute is unlimited.</li><li><span class='code'>periodminutes</span> - the number of minutes during which the publisher will only publish <tt>rate</tt> events or fewer. If not specified, the default is <strong>one minute</strong></li><li><span class='code'>threshold</span> - apply this rule only to messages with this log4j severity level or above. An example is \"WARN\" for warning level messages or higher.</li><li><span class='code'>content</span> - match only those messages whose contents match this regular expression.</li><li><span class='code'>exceptiontype</span> - match only those messages which are part of an exception message. The exception type must match this regular expression.</li></ul><br/><p>Example:<span class='code'>{\"alert\": false, \"rate\": 10, \"exceptiontype\": \"java.lang.StringIndexOutOfBoundsException\"}</span></p><p>This rule will send events to Cloudera Manager for every <span class='code'>StringIndexOutOfBoundsException</span>, up to a maximum of 10 every minute.</p>",
"display_name": "Rules to Extract Events from Log Files",
"name": "log_event_whitelist",
"value": "{\n \"version\": \"0\",\n \"rules\": [\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"FATAL\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Instead, use .*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Use .* instead\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.io.IOException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.net.SocketException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.net.SocketClosedException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.io.EOFException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.nio.channels.CancelledKeyException\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 5, \"content\":\"Datanode registration failed\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 2, \"exceptiontype\": \".*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"Got a command from standby NN - ignoring command:.*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"Unknown job [^ ]+ being deleted.*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"Error executing shell command .+ No such process.+\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\".*attempt to override final parameter.+\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"[^ ]+ is a deprecated filesystem name. Use.*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\"}\n ]\n}\n"
},
{
"desc": "Specifies the maximum number of threads to use for transferring data in and out of the DataNode.",
"display_name": "Maximum Number of Transfer Threads",
"name": "dfs_datanode_max_xcievers",
"value": "4096"
},
{
"desc": "Comma-separated list of DataNode plug-ins to be activated. If one plug-in cannot be loaded, all the plug-ins are ignored.",
"display_name": "DateNode Plugins",
"name": "dfs_datanode_plugins_list",
"value": ""
},
{
"desc": "Weight for the read I/O requests issued by this role. The greater the weight, the higher the priority of the requests when the host experiences I/O contention. Must be between 100 and 1000. Defaults to 1000 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup I/O Weight",
"name": "rm_io_weight",
"value": "500"
},
{
"desc": "In some workloads, the data read from HDFS is known to be significantly large enough that it is unlikely to be useful to cache it in the operating system buffer cache. In this case, the DataNode may be configured to automatically purge all data from the buffer cache after it is delivered to the client. This may improve performance for some workloads by freeing buffer cache spare usage for more cacheable data. This behavior will always be disabled for workloads that read only short sections of a block (e.g HBase random-IO workloads). This property is supported in CDH3u3 or later deployments.",
"display_name": "Enable purging cache after reads",
"name": "dfs_datanode_drop_cache_behind_reads",
"value": "false"
},
{
"desc": "Number of CPU shares to assign to this role. The greater the number of shares, the larger the share of the host's CPUs that will be given to this role when the host experiences CPU contention. Must be between 2 and 262144. Defaults to 1024 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup CPU Shares",
"name": "rm_cpu_shares",
"value": "1024"
},
{
"desc": "Comma-delimited list of directories on the local file system where the DataNode stores HDFS block data. Typical values are /data/N/dfs/dn for N = 1, 2, 3... These directories should be mounted using the noatime option and the disks should be configured using JBOD. RAID is not recommended.",
"display_name": "DataNode Data Directory",
"name": "dfs_data_dir_list",
"value": null
},
{
"desc": "The number of volumes that are allowed to fail before a DataNode stops offering service. By default, any volume failure will cause a DataNode to shutdown.",
"display_name": "DataNode Failed Volumes Tolerated",
"name": "dfs_datanode_failed_volumes_tolerated",
"value": "0"
},
{
"desc": "In some workloads, the data written to HDFS is known to be significantly large enough that it is unlikely to be useful to cache it in the operating system buffer cache. In this case, the DataNode may be configured to automatically purge all data from the buffer cache after it is written to disk. This may improve performance for some workloads by freeing buffer cache spare usage for more cacheable data. This property is supported in CDH3u3 or later deployments.",
"display_name": "Enable purging cache after writes",
"name": "dfs_datanode_drop_cache_behind_writes",
"value": "false"
},
{
"desc": "If enabled, the DataNode binds to the wildcard address (\"0.0.0.0\") on all of its ports.",
"display_name": "Bind DataNode to Wildcard Address",
"name": "dfs_datanode_bind_wildcard",
"value": "false"
},
{
"desc": "The number of server threads for the DataNode.",
"display_name": "Handler Count",
"name": "dfs_datanode_handler_count",
"value": "3"
},
{
"desc": "When computing the overall DataNode health, consider the host's health.",
"display_name": "DataNode Host Health Test",
"name": "datanode_host_health_enabled",
"value": "true"
},
{
"desc": "When set, this role's process is automatically (and transparently) restarted in the event of an unexpected failure.",
"display_name": "Automatically Restart Process",
"name": "process_auto_restart",
"value": "true"
},
{
"desc": "The maximum number of rolled log files to keep for DataNode logs. Typically used by log4j.",
"display_name": "DataNode Maximum Log File Backups",
"name": "max_log_backup_index",
"value": "10"
},
{
"desc": "<p>The configured triggers for this role. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.</p><p>Each trigger has all of the following fields:</p><ul><li><span class='code'>triggerName</span> <strong>(mandatory)</strong> - the name of the trigger. This value must be unique for the specific role. </li><li><span class='code'>triggerExpression</span> <strong>(mandatory)</strong> - a tsquery expression representing the trigger. <li><span class='code'>streamThreshold</span> <strong>(optional)</strong> - the maximum number of streams that can satisfy a condition of a trigger before the condition fires. By default set to 0, and any stream returned will cause the condition to fire. <li><span class='code'>enabled</span> <strong> (optional)</strong> - by default set to 'true'. If set to 'false' the trigger will not be evaluated.</p><p>For example, here is a JSON formatted trigger configured for a DataNode that fires if the DataNode has more than 1500 file-descriptors opened:</p><p><pre>[{\"triggerName\": \"sample-trigger\",\n \"triggerExpression\": \"IF (SELECT fd_open WHERE roleName=$ROLENAME and last(fd_open) > 1500) DO health:red\",\n \"streamThreshold\": 0}, \"enabled\": \"true\"]</pre></p><p>Consult the trigger rules documentation for more details on how to write triggers using tsquery.</p><p>The JSON format is evolving and may change in the future and as a result backward compatibility is not guaranteed between releases at this time.</p>",
"display_name": "Role Triggers",
"name": "role_triggers",
"value": "[]"
},
{
"desc": "Timeout in seconds for the Hue Thrift server running on each DataNode",
"display_name": "Hue Thrift Server Timeout",
"name": "dfs_thrift_timeout",
"value": "60"
},
{
"desc": "The period to review when computing the moving average of garbage collection time.",
"display_name": "Garbage Collection Duration Monitoring Period",
"name": "datanode_gc_duration_window",
"value": "5"
},
{
"desc": "Soft memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process if and only if the host is facing memory pressure. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Soft Limit",
"name": "rm_memory_soft_limit",
"value": "-1"
},
{
"desc": "Whether DataNodes should use DataNode hostnames when connecting to DataNodes for data transfer. This property is supported in CDH3u4 or later deployments.",
"display_name": "Use DataNode Hostname",
"name": "dfs_datanode_use_datanode_hostname",
"value": "false"
},
{
"desc": "These arguments will be passed as part of the Java command line. Commonly, garbage collection flags or extra debugging flags would be passed here.",
"display_name": "Java Configuration Options for DataNode",
"name": "datanode_java_opts",
"value": "-XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:-CMSConcurrentMTEnabled -XX:CMSInitiatingOccupancyFraction=70 -XX:+CMSParallelRemarkEnabled"
},
{
"desc": "The health test thresholds of the number of file descriptors used. Specified as a percentage of file descriptor limit.",
"display_name": "File Descriptor Monitoring Thresholds",
"name": "datanode_fd_thresholds",
"value": "{\"critical\":\"70.0\",\"warning\":\"50.0\"}"
},
{
"desc": "Hard memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Hard Limit",
"name": "rm_memory_hard_limit",
"value": "-1"
},
{
"desc": "The health test thresholds on the duration of the metrics request to the web server.",
"display_name": "Web Metric Collection Duration",
"name": "datanode_web_metric_collection_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"10000.0\"}"
},
{
"desc": "Enables the health test that the DataNode's process state is consistent with the role configuration",
"display_name": "DataNode Process Health Test",
"name": "datanode_scm_health_enabled",
"value": "true"
},
{
"desc": "Advanced Configuration Snippet (Safety Valve) for Hadoop Metrics2. Properties will be inserted into <strong>hadoop-metrics2.properties</strong>.",
"display_name": "Hadoop Metrics2 Advanced Configuration Snippet (Safety Valve)",
"name": "hadoop_metrics2_safety_valve",
"value": null
},
{
"desc": "For advanced use only, a string to be inserted into <strong>log4j.properties</strong> for this role only.",
"display_name": "DataNode Logging Advanced Configuration Snippet (Safety Valve)",
"name": "log4j_safety_valve",
"value": null
},
{
"desc": "DataNode Policy for picking which volume should get a new block. The Available Space policy is only available starting with CDH 4.3.",
"display_name": "DataNode Volume Choosing Policy",
"name": "dfs_datanode_volume_choosing_policy",
"value": "org.apache.hadoop.hdfs.server.datanode.fsdataset.RoundRobinVolumeChoosingPolicy"
},
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory.",
"display_name": "Log Directory Free Space Monitoring Absolute Thresholds",
"name": "log_directory_free_space_absolute_thresholds",
"value": "{\"critical\":\"5.36870912E9\",\"warning\":\"1.073741824E10\"}"
},
{
"desc": "Port for the DataNode HTTP web UI. Combined with the DataNode's hostname to build its HTTP address.",
"display_name": "DataNode HTTP Web UI Port",
"name": "dfs_datanode_http_port",
"value": "50075"
},
{
"desc": "If configured, overrides the process soft and hard rlimits (also called ulimits) for file descriptors to the configured value.",
"display_name": "Maximum Process File Descriptors",
"name": "rlimit_fds",
"value": null
},
{
"desc": "Directory where DataNode will place its log files.",
"display_name": "DataNode Log Directory",
"name": "datanode_log_dir",
"value": "/var/log/hadoop-hdfs"
},
{
"desc": "When set, Cloudera Manager will send alerts when the health of this role reaches the threshold specified by the EventServer setting eventserver_health_events_alert_threshold",
"display_name": "Enable Health Alerts for this Role",
"name": "enable_alerts",
"value": "false"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>hdfs-site.xml</strong> for this role only.",
"display_name": "DataNode Advanced Configuration Snippet (Safety Valve) for hdfs-site.xml",
"name": "datanode_config_safety_valve",
"value": null
},
{
"desc": "While reading block files, the DataNode can use the posix_fadvise system call to explicitly page data into the operating system buffer cache ahead of the current reader's position. This can improve performance especially when disks are highly contended. This configuration specifies the number of bytes ahead of the current read position which the DataNode will attempt to read ahead. A value of 0 disables this feature. This property is supported in CDH3u3 or later deployments.",
"display_name": "Number of read ahead bytes",
"name": "dfs_datanode_readahead_bytes",
"value": "4194304"
},
{
"desc": "The maximum size, in megabytes, per log file for DataNode logs. Typically used by log4j.",
"display_name": "DataNode Max Log Size",
"name": "max_log_size",
"value": "200"
},
{
"desc": "The maximum amount of memory a DataNode may use to cache data blocks in memory. Setting it to zero will disable caching.",
"display_name": "Maximum Memory Used for Caching",
"name": "dfs_datanode_max_locked_memory",
"value": "4294967296"
},
{
"desc": "Only used when the DataNode Volume Choosing Policy is set to Available Space. Controls how much DataNode volumes are allowed to differ in terms of bytes of free disk space before they are considered imbalanced. If the free space of all the volumes are within this range of each other, the volumes will be considered balanced and block assignments will be done on a pure round robin basis.",
"display_name": "Available Space Policy Balanced Threshold",
"name": "dfs_datanode_available_space_balanced_threshold",
"value": "10737418240"
},
{
"desc": "When set, generates heap dump file when java.lang.OutOfMemoryError is thrown.",
"display_name": "Dump Heap When Out of Memory",
"name": "oom_heap_dump_enabled",
"value": "false"
},
{
"desc": "The period to review when computing unexpected exits.",
"display_name": "Unexpected Exits Monitoring Period",
"name": "unexpected_exits_window",
"value": "5"
},
{
"desc": "The health test thresholds for unexpected exits encountered within a recent period specified by the unexpected_exits_window configuration for the role.",
"display_name": "Unexpected Exits Thresholds",
"name": "unexpected_exits_thresholds",
"value": "{\"critical\":\"any\",\"warning\":\"never\"}"
},
{
"desc": "The health test thresholds of free space in a DataNode. Specified as a percentage of the capacity on the DataNode.",
"display_name": "DataNode Free Space Monitoring Thresholds",
"name": "datanode_free_space_thresholds",
"value": "{\"critical\":\"10.0\",\"warning\":\"20.0\"}"
},
{
"desc": "Port for the various DataNode Protocols. Combined with the DataNode's hostname to build its IPC port address.",
"display_name": "DataNode Protocol Port",
"name": "dfs_datanode_ipc_port",
"value": "50020"
},
{
"desc": "Port for DataNode's XCeiver Protocol. Combined with the DataNode's hostname to build its address.",
"display_name": "DataNode Transceiver Port",
"name": "dfs_datanode_port",
"value": "50010"
},
{
"desc": "Minimum number of running threads for the Hue Thrift server running on each DataNode",
"display_name": "Hue Thrift Server Min Threadcount",
"name": "dfs_thrift_threads_min",
"value": "10"
},
{
"desc": "The health test thresholds for the weighted average time spent in Java garbage collection. Specified as a percentage of elapsed wall clock time.",
"display_name": "Garbage Collection Duration Thresholds",
"name": "datanode_gc_duration_thresholds",
"value": "{\"critical\":\"60.0\",\"warning\":\"30.0\"}"
},
{
"desc": "If this configuration is enabled, the DataNode will instruct the operating system to enqueue all written data to the disk immediately after it is written. This differs from the usual OS policy which may wait for up to 30 seconds before triggering writeback. This may improve performance for some workloads by smoothing the IO profile for data written to disk. This property is supported in CDH3u3 or later deployments.",
"display_name": "Enable immediate enqueuing of data to disk after writes",
"name": "dfs_datanode_sync_behind_writes",
"value": "false"
},
{
"desc": "The health test thresholds of the number of blocks on a DataNode",
"display_name": "DataNode Block Count Thresholds",
"name": "datanode_block_count_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"200000.0\"}"
},
{
"desc": "Permissions for the directories on the local file system where the DataNode stores its blocks. The permissions must be octal. 755 and 700 are typical values.",
"display_name": "DataNode Data Directory Permissions",
"name": "dfs_datanode_data_dir_perm",
"value": "700"
},
{
"desc": "Reserved space in bytes per volume for non Distributed File System (DFS) use.",
"display_name": "Reserved Space for Non DFS Use",
"name": "dfs_datanode_du_reserved",
"value": "10737418240"
},
{
"desc": "The amount of time to wait for the DataNode to fully start up and connect to the NameNode before enforcing the connectivity check.",
"display_name": "DataNode Connectivity Tolerance at Startup",
"name": "datanode_connectivity_tolerance",
"value": "180"
},
{
"desc": "The base port where the secure DataNode web UI listens. Combined with the DataNode's hostname to build its secure web UI address.",
"display_name": "Secure DataNode Web UI Port (SSL)",
"name": "dfs_datanode_https_port",
"value": "50475"
},
{
"desc": "Maximum amount of bandwidth that each DataNode can use for balancing. Specified in bytes per second.",
"display_name": "DataNode Balancing Bandwidth",
"name": "dfs_balance_bandwidthPerSec",
"value": "10485760"
},
{
"desc": "Maximum number of running threads for the Hue Thrift server running on each DataNode",
"display_name": "Hue Thrift Server Max Threadcount",
"name": "dfs_thrift_threads_max",
"value": "20"
},
{
"desc": "Only used when the DataNode Volume Choosing Policy is set to Available Space. Controls what percentage of new block allocations will be sent to volumes with more available disk space than others. This setting should be in the range 0.0 - 1.0, though in practice 0.5 - 1.0, since there should be no reason to prefer that volumes with less available disk space receive more block allocations.",
"display_name": "Available Space Policy Balanced Preference",
"name": "dfs_datanode_available_space_balanced_preference",
"value": "0.75"
},
{
"desc": "The health test thresholds of failed volumes in a DataNode.",
"display_name": "DataNode Volume Failures Thresholds",
"name": "datanode_volume_failures_thresholds",
"value": "{\"critical\":\"any\",\"warning\":\"never\"}"
},
{
"desc": "Path to directory where heap dumps are generated when java.lang.OutOfMemoryError error is thrown. This directory is automatically created if it doesn't exist. However, if this directory already exists, role user must have write access to this directory. If this directory is shared amongst multiple roles, it should have 1777 permissions. Note that the heap dump files are created with 600 permissions and are owned by the role user. The amount of free space in this directory should be greater than the maximum Java Process heap size configured for this role.",
"display_name": "Heap Dump Directory",
"name": "oom_heap_dump_dir",
"value": "/tmp"
},
{
"desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.",
"display_name": "Enable Configuration Change Alerts",
"name": "enable_config_alerts",
"value": "false"
},
{
"desc": "When set, a SIGKILL signal is sent to the role process when java.lang.OutOfMemoryError is thrown.",
"display_name": "Kill When Out of Memory",
"name": "oom_sigkill_enabled",
"value": "true"
},
{
"desc": "The minimum log level for DataNode logs",
"display_name": "DataNode Logging Threshold",
"name": "log_threshold",
"value": "INFO"
},
{
"desc": "Enables the health test that the Cloudera Manager Agent can successfully contact and gather metrics from the web server.",
"display_name": "Web Metric Collection",
"name": "datanode_web_metric_collection_enabled",
"value": "true"
},
{
"desc": "Maximum size for the Java Process heap memory. Passed to Java -Xmx. Measured in bytes.",
"display_name": "Java Heap Size of DataNode in Bytes",
"name": "datanode_java_heapsize",
"value": "1073741824"
},
{
"desc": "Enables the health test that verifies the DataNode is connected to the NameNode",
"display_name": "DataNode Connectivity Health Test",
"name": "datanode_connectivity_health_enabled",
"value": "true"
}
]

View File

@ -0,0 +1,170 @@
[
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory. Specified as a percentage of the capacity on that filesystem. This setting is not used if a Log Directory Free Space Monitoring Absolute Thresholds setting is configured.",
"display_name": "Log Directory Free Space Monitoring Percentage Thresholds",
"name": "log_directory_free_space_percentage_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"never\"}"
},
{
"desc": "Directory where Failover Controller will place its log files.",
"display_name": "Failover Controller Log Directory",
"name": "failover_controller_log_dir",
"value": "/var/log/hadoop-hdfs"
},
{
"desc": "<p>This file contains the rules which govern how log messages are turned into events by the custom log4j appender that this role loads. It is in JSON format, and is composed of a list of rules. Every log message is evaluated against each of these rules in turn to decide whether or not to send an event for that message.</p><p>Each rule has some or all of the following fields:</p><ul><li><span class='code'>alert</span> - whether or not events generated from this rule should be promoted to alerts. A value of \"true\" will cause alerts to be generated. If not specified, the default is \"false\".</li><li><span class='code'>rate</span> <strong>(mandatory)</strong> - the maximum number of log messages matching this rule that may be sent as events every minute. If more than <tt>rate</tt> matching log messages are received in a single minute, the extra messages are ignored. If rate is less than 0, the number of messages per minute is unlimited.</li><li><span class='code'>periodminutes</span> - the number of minutes during which the publisher will only publish <tt>rate</tt> events or fewer. If not specified, the default is <strong>one minute</strong></li><li><span class='code'>threshold</span> - apply this rule only to messages with this log4j severity level or above. An example is \"WARN\" for warning level messages or higher.</li><li><span class='code'>content</span> - match only those messages whose contents match this regular expression.</li><li><span class='code'>exceptiontype</span> - match only those messages which are part of an exception message. The exception type must match this regular expression.</li></ul><br/><p>Example:<span class='code'>{\"alert\": false, \"rate\": 10, \"exceptiontype\": \"java.lang.StringIndexOutOfBoundsException\"}</span></p><p>This rule will send events to Cloudera Manager for every <span class='code'>StringIndexOutOfBoundsException</span>, up to a maximum of 10 every minute.</p>",
"display_name": "Rules to Extract Events from Log Files",
"name": "log_event_whitelist",
"value": "{\n \"version\": \"0\",\n \"rules\": [\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"FATAL\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 2, \"exceptiontype\": \".*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\"}\n ]\n}"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>hdfs-site.xml</strong> for this role only.",
"display_name": "Failover Controller Advanced Configuration Snippet (Safety Valve) for hdfs-site.xml",
"name": "fc_config_safety_valve",
"value": null
},
{
"desc": "When set, this role's process is automatically (and transparently) restarted in the event of an unexpected failure.",
"display_name": "Automatically Restart Process",
"name": "process_auto_restart",
"value": "false"
},
{
"desc": "The maximum number of rolled log files to keep for Failover Controller logs. Typically used by log4j.",
"display_name": "Failover Controller Maximum Log File Backups",
"name": "max_log_backup_index",
"value": "10"
},
{
"desc": "<p>The configured triggers for this role. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.</p><p>Each trigger has all of the following fields:</p><ul><li><span class='code'>triggerName</span> <strong>(mandatory)</strong> - the name of the trigger. This value must be unique for the specific role. </li><li><span class='code'>triggerExpression</span> <strong>(mandatory)</strong> - a tsquery expression representing the trigger. <li><span class='code'>streamThreshold</span> <strong>(optional)</strong> - the maximum number of streams that can satisfy a condition of a trigger before the condition fires. By default set to 0, and any stream returned will cause the condition to fire. <li><span class='code'>enabled</span> <strong> (optional)</strong> - by default set to 'true'. If set to 'false' the trigger will not be evaluated.</p><p>For example, here is a JSON formatted trigger configured for a DataNode that fires if the DataNode has more than 1500 file-descriptors opened:</p><p><pre>[{\"triggerName\": \"sample-trigger\",\n \"triggerExpression\": \"IF (SELECT fd_open WHERE roleName=$ROLENAME and last(fd_open) > 1500) DO health:red\",\n \"streamThreshold\": 0}, \"enabled\": \"true\"]</pre></p><p>Consult the trigger rules documentation for more details on how to write triggers using tsquery.</p><p>The JSON format is evolving and may change in the future and as a result backward compatibility is not guaranteed between releases at this time.</p>",
"display_name": "Role Triggers",
"name": "role_triggers",
"value": "[]"
},
{
"desc": "Soft memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process if and only if the host is facing memory pressure. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Soft Limit",
"name": "rm_memory_soft_limit",
"value": "-1"
},
{
"desc": "Maximum size for the Java Process heap memory. Passed to Java -Xmx. Measured in bytes.",
"display_name": "Java Heap Size of Failover Controller in Bytes",
"name": "failover_controller_java_heapsize",
"value": "268435456"
},
{
"desc": "Hard memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Hard Limit",
"name": "rm_memory_hard_limit",
"value": "-1"
},
{
"desc": "The period to review when computing unexpected exits.",
"display_name": "Unexpected Exits Monitoring Period",
"name": "unexpected_exits_window",
"value": "5"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>log4j.properties</strong> for this role only.",
"display_name": "Failover Controller Logging Advanced Configuration Snippet (Safety Valve)",
"name": "log4j_safety_valve",
"value": null
},
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory.",
"display_name": "Log Directory Free Space Monitoring Absolute Thresholds",
"name": "log_directory_free_space_absolute_thresholds",
"value": "{\"critical\":\"5.36870912E9\",\"warning\":\"1.073741824E10\"}"
},
{
"desc": "If configured, overrides the process soft and hard rlimits (also called ulimits) for file descriptors to the configured value.",
"display_name": "Maximum Process File Descriptors",
"name": "rlimit_fds",
"value": null
},
{
"desc": "When computing the overall Failover Controller health, consider the host's health.",
"display_name": "Failover Controller Host Health Test",
"name": "failovercontroller_host_health_enabled",
"value": "true"
},
{
"desc": "The maximum size, in megabytes, per log file for Failover Controller logs. Typically used by log4j.",
"display_name": "Failover Controller Max Log Size",
"name": "max_log_size",
"value": "200"
},
{
"desc": "When set, a SIGKILL signal is sent to the role process when java.lang.OutOfMemoryError is thrown.",
"display_name": "Kill When Out of Memory",
"name": "oom_sigkill_enabled",
"value": "true"
},
{
"desc": "The health test thresholds of the number of file descriptors used. Specified as a percentage of file descriptor limit.",
"display_name": "File Descriptor Monitoring Thresholds",
"name": "failovercontroller_fd_thresholds",
"value": "{\"critical\":\"70.0\",\"warning\":\"50.0\"}"
},
{
"desc": "When set, generates heap dump file when java.lang.OutOfMemoryError is thrown.",
"display_name": "Dump Heap When Out of Memory",
"name": "oom_heap_dump_enabled",
"value": "false"
},
{
"desc": "The health test thresholds for unexpected exits encountered within a recent period specified by the unexpected_exits_window configuration for the role.",
"display_name": "Unexpected Exits Thresholds",
"name": "unexpected_exits_thresholds",
"value": "{\"critical\":\"any\",\"warning\":\"never\"}"
},
{
"desc": "When set, Cloudera Manager will send alerts when the health of this role reaches the threshold specified by the EventServer setting eventserver_health_events_alert_threshold",
"display_name": "Enable Health Alerts for this Role",
"name": "enable_alerts",
"value": "true"
},
{
"desc": "Enables the health test that the Failover Controller's process state is consistent with the role configuration",
"display_name": "Failover Controller Process Health Test",
"name": "failovercontroller_scm_health_enabled",
"value": "true"
},
{
"desc": "These arguments will be passed as part of the Java command line. Commonly, garbage collection flags or extra debugging flags would be passed here.",
"display_name": "Java Configuration Options for Failover Controller",
"name": "failover_controller_java_opts",
"value": ""
},
{
"desc": "Weight for the read I/O requests issued by this role. The greater the weight, the higher the priority of the requests when the host experiences I/O contention. Must be between 100 and 1000. Defaults to 1000 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup I/O Weight",
"name": "rm_io_weight",
"value": "500"
},
{
"desc": "Number of CPU shares to assign to this role. The greater the number of shares, the larger the share of the host's CPUs that will be given to this role when the host experiences CPU contention. Must be between 2 and 262144. Defaults to 1024 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup CPU Shares",
"name": "rm_cpu_shares",
"value": "1024"
},
{
"desc": "Path to directory where heap dumps are generated when java.lang.OutOfMemoryError error is thrown. This directory is automatically created if it doesn't exist. However, if this directory already exists, role user must have write access to this directory. If this directory is shared amongst multiple roles, it should have 1777 permissions. Note that the heap dump files are created with 600 permissions and are owned by the role user. The amount of free space in this directory should be greater than the maximum Java Process heap size configured for this role.",
"display_name": "Heap Dump Directory",
"name": "oom_heap_dump_dir",
"value": "/tmp"
},
{
"desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.",
"display_name": "Enable Configuration Change Alerts",
"name": "enable_config_alerts",
"value": "false"
},
{
"desc": "The minimum log level for Failover Controller logs",
"display_name": "Failover Controller Logging Threshold",
"name": "log_threshold",
"value": "INFO"
}
]

View File

@ -0,0 +1,50 @@
[
{
"desc": "For advanced use only, a string to be inserted into the client configuration for <strong>hdfs-site.xml</strong>.",
"display_name": "HDFS Client Advanced Configuration Snippet (Safety Valve) for hdfs-site.xml",
"name": "hdfs_client_config_safety_valve",
"value": null
},
{
"desc": "For advanced use only, key-value pairs (one on each line) to be inserted into the client configuration for <strong>hadoop-env.sh</strong>.",
"display_name": "HDFS Client Environment Advanced Configuration Snippet for hadoop-env.sh (Safety Valve)",
"name": "hdfs_client_env_safety_valve",
"value": null
},
{
"desc": "The priority level that the client configuration will have in the Alternatives system on the hosts. Higher priority levels will cause Alternatives to prefer this configuration over any others.",
"display_name": "Alternatives Priority",
"name": "client_config_priority",
"value": "90"
},
{
"desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.",
"display_name": "Enable Configuration Change Alerts",
"name": "enable_config_alerts",
"value": "false"
},
{
"desc": "Maximum size for the Java process heap memory. Passed to Java -Xmx. Measured in bytes.",
"display_name": "Client Java Heap Size in Bytes",
"name": "hdfs_client_java_heapsize",
"value": "268435456"
},
{
"desc": "Enable HDFS short circuit read. This allows a client co-located with the DataNode to read HDFS file blocks directly. This gives a performance boost to distributed clients that are aware of locality.",
"display_name": "Enable HDFS Short Circuit Read",
"name": "dfs_client_read_shortcircuit",
"value": "false"
},
{
"desc": "These are Java command line arguments. Commonly, garbage collection flags or extra debugging flags would be passed here.",
"display_name": "Client Java Configuration Options",
"name": "hbase_client_java_opts",
"value": "-Djava.net.preferIPv4Stack=true"
},
{
"desc": "Move deleted files to the trash so that they can be recovered if necessary. This client side configuration takes effect only if the HDFS service-wide trash is disabled (NameNode Filesystem Trash Interval set to 0) and is ignored otherwise. The trash is not automatically emptied when enabled with this configuration.",
"display_name": "Use Trash",
"name": "dfs_client_use_trash",
"value": "false"
}
]

View File

@ -0,0 +1,194 @@
[
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory. Specified as a percentage of the capacity on that filesystem. This setting is not used if a Log Directory Free Space Monitoring Absolute Thresholds setting is configured.",
"display_name": "Log Directory Free Space Monitoring Percentage Thresholds",
"name": "log_directory_free_space_percentage_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"never\"}"
},
{
"desc": "The health test thresholds of the number of file descriptors used. Specified as a percentage of file descriptor limit.",
"display_name": "File Descriptor Monitoring Thresholds",
"name": "httpfs_fd_thresholds",
"value": "{\"critical\":\"70.0\",\"warning\":\"50.0\"}"
},
{
"desc": "The user that the HttpFS server process should run as.",
"display_name": "System User",
"name": "httpfs_process_username",
"value": "httpfs"
},
{
"desc": "The group that the HttpFS server process should run as.",
"display_name": "System Group",
"name": "httpfs_process_groupname",
"value": "httpfs"
},
{
"desc": "When computing the overall HttpFS health, consider the host's health.",
"display_name": "HttpFS Host Health Test",
"name": "httpfs_host_health_enabled",
"value": "true"
},
{
"desc": "When set, this role's process is automatically (and transparently) restarted in the event of an unexpected failure.",
"display_name": "Automatically Restart Process",
"name": "process_auto_restart",
"value": "false"
},
{
"desc": "The maximum number of rolled log files to keep for HttpFS logs. Typically used by log4j.",
"display_name": "HttpFS Maximum Log File Backups",
"name": "max_log_backup_index",
"value": "10"
},
{
"desc": "<p>The configured triggers for this role. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.</p><p>Each trigger has all of the following fields:</p><ul><li><span class='code'>triggerName</span> <strong>(mandatory)</strong> - the name of the trigger. This value must be unique for the specific role. </li><li><span class='code'>triggerExpression</span> <strong>(mandatory)</strong> - a tsquery expression representing the trigger. <li><span class='code'>streamThreshold</span> <strong>(optional)</strong> - the maximum number of streams that can satisfy a condition of a trigger before the condition fires. By default set to 0, and any stream returned will cause the condition to fire. <li><span class='code'>enabled</span> <strong> (optional)</strong> - by default set to 'true'. If set to 'false' the trigger will not be evaluated.</p><p>For example, here is a JSON formatted trigger configured for a DataNode that fires if the DataNode has more than 1500 file-descriptors opened:</p><p><pre>[{\"triggerName\": \"sample-trigger\",\n \"triggerExpression\": \"IF (SELECT fd_open WHERE roleName=$ROLENAME and last(fd_open) > 1500) DO health:red\",\n \"streamThreshold\": 0}, \"enabled\": \"true\"]</pre></p><p>Consult the trigger rules documentation for more details on how to write triggers using tsquery.</p><p>The JSON format is evolving and may change in the future and as a result backward compatibility is not guaranteed between releases at this time.</p>",
"display_name": "Role Triggers",
"name": "role_triggers",
"value": "[]"
},
{
"desc": "Enables the health test that the HttpFS's process state is consistent with the role configuration",
"display_name": "HttpFS Process Health Test",
"name": "httpfs_scm_health_enabled",
"value": "true"
},
{
"desc": "Soft memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process if and only if the host is facing memory pressure. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Soft Limit",
"name": "rm_memory_soft_limit",
"value": "-1"
},
{
"desc": "Hard memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Hard Limit",
"name": "rm_memory_hard_limit",
"value": "-1"
},
{
"desc": "The period to review when computing unexpected exits.",
"display_name": "Unexpected Exits Monitoring Period",
"name": "unexpected_exits_window",
"value": "5"
},
{
"desc": "Directory where HttpFS will place its log files.",
"display_name": "HttpFS Log Directory",
"name": "httpfs_log_dir",
"value": "/var/log/hadoop-httpfs"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>log4j.properties</strong> for this role only.",
"display_name": "HttpFS Logging Advanced Configuration Snippet (Safety Valve)",
"name": "log4j_safety_valve",
"value": null
},
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory.",
"display_name": "Log Directory Free Space Monitoring Absolute Thresholds",
"name": "log_directory_free_space_absolute_thresholds",
"value": "{\"critical\":\"5.36870912E9\",\"warning\":\"1.073741824E10\"}"
},
{
"desc": "If configured, overrides the process soft and hard rlimits (also called ulimits) for file descriptors to the configured value.",
"display_name": "Maximum Process File Descriptors",
"name": "rlimit_fds",
"value": null
},
{
"desc": "When set, Cloudera Manager will send alerts when the health of this role reaches the threshold specified by the EventServer setting eventserver_health_events_alert_threshold",
"display_name": "Enable Health Alerts for this Role",
"name": "enable_alerts",
"value": "true"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>httpfs-site.xml</strong> for this role only.",
"display_name": "HttpFS Advanced Configuration Snippet (Safety Valve) for httpfs-site.xml",
"name": "httpfs_config_safety_valve",
"value": null
},
{
"desc": "The maximum size, in megabytes, per log file for HttpFS logs. Typically used by log4j.",
"display_name": "HttpFS Max Log Size",
"name": "max_log_size",
"value": "200"
},
{
"desc": "When set, a SIGKILL signal is sent to the role process when java.lang.OutOfMemoryError is thrown.",
"display_name": "Kill When Out of Memory",
"name": "oom_sigkill_enabled",
"value": "true"
},
{
"desc": "These arguments will be passed as part of the Java command line. Commonly, garbage collection flags or extra debugging flags would be passed here.",
"display_name": "Java Configuration Options for HttpFS",
"name": "httpfs_java_opts",
"value": ""
},
{
"desc": "When set, generates heap dump file when java.lang.OutOfMemoryError is thrown.",
"display_name": "Dump Heap When Out of Memory",
"name": "oom_heap_dump_enabled",
"value": "false"
},
{
"desc": "The health test thresholds for unexpected exits encountered within a recent period specified by the unexpected_exits_window configuration for the role.",
"display_name": "Unexpected Exits Thresholds",
"name": "unexpected_exits_thresholds",
"value": "{\"critical\":\"any\",\"warning\":\"never\"}"
},
{
"desc": "The secret to use for signing client authentication tokens.",
"display_name": "Signature Secret",
"name": "hdfs_httpfs_signature_secret",
"value": "hadoop httpfs secret"
},
{
"desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.",
"display_name": "Enable Configuration Change Alerts",
"name": "enable_config_alerts",
"value": "false"
},
{
"desc": "Maximum size for the Java Process heap memory. Passed to Java -Xmx. Measured in bytes.",
"display_name": "Java Heap Size of HttpFS in Bytes",
"name": "httpfs_java_heapsize",
"value": "268435456"
},
{
"desc": "The port for the administration interface.",
"display_name": "Administration Port",
"name": "hdfs_httpfs_admin_port",
"value": "14001"
},
{
"desc": "Weight for the read I/O requests issued by this role. The greater the weight, the higher the priority of the requests when the host experiences I/O contention. Must be between 100 and 1000. Defaults to 1000 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup I/O Weight",
"name": "rm_io_weight",
"value": "500"
},
{
"desc": "Number of CPU shares to assign to this role. The greater the number of shares, the larger the share of the host's CPUs that will be given to this role when the host experiences CPU contention. Must be between 2 and 262144. Defaults to 1024 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup CPU Shares",
"name": "rm_cpu_shares",
"value": "1024"
},
{
"desc": "The HTTP port where the REST interface to HDFS is available.",
"display_name": "HTTP Port",
"name": "hdfs_httpfs_http_port",
"value": "14000"
},
{
"desc": "Path to directory where heap dumps are generated when java.lang.OutOfMemoryError error is thrown. This directory is automatically created if it doesn't exist. However, if this directory already exists, role user must have write access to this directory. If this directory is shared amongst multiple roles, it should have 1777 permissions. Note that the heap dump files are created with 600 permissions and are owned by the role user. The amount of free space in this directory should be greater than the maximum Java Process heap size configured for this role.",
"display_name": "Heap Dump Directory",
"name": "oom_heap_dump_dir",
"value": "/tmp"
},
{
"desc": "The minimum log level for HttpFS logs",
"display_name": "HttpFS Logging Threshold",
"name": "log_threshold",
"value": "INFO"
}
]

View File

@ -0,0 +1,242 @@
[
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory. Specified as a percentage of the capacity on that filesystem. This setting is not used if a Log Directory Free Space Monitoring Absolute Thresholds setting is configured.",
"display_name": "Log Directory Free Space Monitoring Percentage Thresholds",
"name": "log_directory_free_space_percentage_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"never\"}"
},
{
"desc": "<p>This file contains the rules which govern how log messages are turned into events by the custom log4j appender that this role loads. It is in JSON format, and is composed of a list of rules. Every log message is evaluated against each of these rules in turn to decide whether or not to send an event for that message.</p><p>Each rule has some or all of the following fields:</p><ul><li><span class='code'>alert</span> - whether or not events generated from this rule should be promoted to alerts. A value of \"true\" will cause alerts to be generated. If not specified, the default is \"false\".</li><li><span class='code'>rate</span> <strong>(mandatory)</strong> - the maximum number of log messages matching this rule that may be sent as events every minute. If more than <tt>rate</tt> matching log messages are received in a single minute, the extra messages are ignored. If rate is less than 0, the number of messages per minute is unlimited.</li><li><span class='code'>periodminutes</span> - the number of minutes during which the publisher will only publish <tt>rate</tt> events or fewer. If not specified, the default is <strong>one minute</strong></li><li><span class='code'>threshold</span> - apply this rule only to messages with this log4j severity level or above. An example is \"WARN\" for warning level messages or higher.</li><li><span class='code'>content</span> - match only those messages whose contents match this regular expression.</li><li><span class='code'>exceptiontype</span> - match only those messages which are part of an exception message. The exception type must match this regular expression.</li></ul><br/><p>Example:<span class='code'>{\"alert\": false, \"rate\": 10, \"exceptiontype\": \"java.lang.StringIndexOutOfBoundsException\"}</span></p><p>This rule will send events to Cloudera Manager for every <span class='code'>StringIndexOutOfBoundsException</span>, up to a maximum of 10 every minute.</p>",
"display_name": "Rules to Extract Events from Log Files",
"name": "log_event_whitelist",
"value": "{\n \"version\": \"0\",\n \"rules\": [\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"FATAL\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 2, \"exceptiontype\": \".*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\"}\n ]\n}"
},
{
"desc": "The health test thresholds on the duration of the metrics request to the web server.",
"display_name": "Web Metric Collection Duration",
"name": "journalnode_web_metric_collection_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"10000.0\"}"
},
{
"desc": "Maximum size for the Java Process heap memory. Passed to Java -Xmx. Measured in bytes.",
"display_name": "Java Heap Size of JournalNode in Bytes",
"name": "journalNode_java_heapsize",
"value": "268435456"
},
{
"desc": "Enables the health test that the Cloudera Manager Agent can successfully contact and gather metrics from the web server.",
"display_name": "Web Metric Collection",
"name": "journalnode_web_metric_collection_enabled",
"value": "true"
},
{
"desc": "Enables the health test that the JournalNode's process state is consistent with the role configuration",
"display_name": "JournalNode Process Health Test",
"name": "journalnode_scm_health_enabled",
"value": "true"
},
{
"desc": "When set, this role's process is automatically (and transparently) restarted in the event of an unexpected failure.",
"display_name": "Automatically Restart Process",
"name": "process_auto_restart",
"value": "true"
},
{
"desc": "The maximum number of rolled log files to keep for JournalNode logs. Typically used by log4j.",
"display_name": "JournalNode Maximum Log File Backups",
"name": "max_log_backup_index",
"value": "10"
},
{
"desc": "The health check thresholds for monitoring of free space on the filesystem that contains the JournalNode's edits directory. Specified as a percentage of the capacity on that filesystem. This setting is not used if a Edits Directory Free Space Monitoring Absolute Thresholds setting is configured.",
"display_name": "Edits Directory Free Space Monitoring Percentage Thresholds",
"name": "journalnode_edits_directory_free_space_percentage_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"never\"}"
},
{
"desc": "Soft memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process if and only if the host is facing memory pressure. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Soft Limit",
"name": "rm_memory_soft_limit",
"value": "-1"
},
{
"desc": "Directory on the local file system where the NameNode's edits are written.",
"display_name": "JournalNode Edits Directory",
"name": "dfs_journalnode_edits_dir",
"value": null
},
{
"desc": "The period to review when computing the moving average of garbage collection time.",
"display_name": "Garbage Collection Duration Monitoring Period",
"name": "journalnode_gc_duration_window",
"value": "5"
},
{
"desc": "Hard memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Hard Limit",
"name": "rm_memory_hard_limit",
"value": "-1"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>hdfs-site.xml</strong> for this role only.",
"display_name": "JournalNode Advanced Configuration Snippet (Safety Valve) for hdfs-site.xml",
"name": "jn_config_safety_valve",
"value": null
},
{
"desc": "The period to review when computing unexpected exits.",
"display_name": "Unexpected Exits Monitoring Period",
"name": "unexpected_exits_window",
"value": "5"
},
{
"desc": "When computing the overall JournalNode health, consider the host's health.",
"display_name": "JournalNode Host Health Test",
"name": "journalnode_host_health_enabled",
"value": "true"
},
{
"desc": "The amount of time at JournalNode startup allowed for the active NameNode to get in sync with the JournalNode.",
"display_name": "Active NameNode Sync Status Startup Tolerance",
"name": "journalnode_sync_status_startup_tolerance",
"value": "180"
},
{
"desc": "The health test thresholds of the number of file descriptors used. Specified as a percentage of file descriptor limit.",
"display_name": "File Descriptor Monitoring Thresholds",
"name": "journalnode_fd_thresholds",
"value": "{\"critical\":\"70.0\",\"warning\":\"50.0\"}"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>log4j.properties</strong> for this role only.",
"display_name": "JournalNode Logging Advanced Configuration Snippet (Safety Valve)",
"name": "log4j_safety_valve",
"value": null
},
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory.",
"display_name": "Log Directory Free Space Monitoring Absolute Thresholds",
"name": "log_directory_free_space_absolute_thresholds",
"value": "{\"critical\":\"5.36870912E9\",\"warning\":\"1.073741824E10\"}"
},
{
"desc": "Enables the health check that verifies the active NameNode's sync status to the JournalNode",
"display_name": "Active NameNode Sync Status Health Check",
"name": "journalnode_sync_status_enabled",
"value": "true"
},
{
"desc": "If configured, overrides the process soft and hard rlimits (also called ulimits) for file descriptors to the configured value.",
"display_name": "Maximum Process File Descriptors",
"name": "rlimit_fds",
"value": null
},
{
"desc": "The health test thresholds for the weighted average time spent in Java garbage collection. Specified as a percentage of elapsed wall clock time.",
"display_name": "Garbage Collection Duration Thresholds",
"name": "journalnode_gc_duration_thresholds",
"value": "{\"critical\":\"60.0\",\"warning\":\"30.0\"}"
},
{
"desc": "Port for the JournalNode's RPC. Combined with the JournalNode's hostname to build its RPC address.",
"display_name": "JournalNode RPC Port",
"name": "dfs_journalnode_rpc_port",
"value": "8485"
},
{
"desc": "When set, Cloudera Manager will send alerts when the health of this role reaches the threshold specified by the EventServer setting eventserver_health_events_alert_threshold",
"display_name": "Enable Health Alerts for this Role",
"name": "enable_alerts",
"value": "true"
},
{
"desc": "The maximum size, in megabytes, per log file for JournalNode logs. Typically used by log4j.",
"display_name": "JournalNode Max Log Size",
"name": "max_log_size",
"value": "200"
},
{
"desc": "When set, a SIGKILL signal is sent to the role process when java.lang.OutOfMemoryError is thrown.",
"display_name": "Kill When Out of Memory",
"name": "oom_sigkill_enabled",
"value": "true"
},
{
"desc": "When set, generates heap dump file when java.lang.OutOfMemoryError is thrown.",
"display_name": "Dump Heap When Out of Memory",
"name": "oom_heap_dump_enabled",
"value": "false"
},
{
"desc": "If enabled, the JournalNode binds to the wildcard address (\"0.0.0.0\") on all of its ports.",
"display_name": "Bind JournalNode to Wildcard Address",
"name": "journalnode_bind_wildcard",
"value": "false"
},
{
"desc": "The health test thresholds for unexpected exits encountered within a recent period specified by the unexpected_exits_window configuration for the role.",
"display_name": "Unexpected Exits Thresholds",
"name": "unexpected_exits_thresholds",
"value": "{\"critical\":\"any\",\"warning\":\"never\"}"
},
{
"desc": "The health check thresholds for monitoring of free space on the filesystem that contains the JournalNode's edits directory.",
"display_name": "Edits Directory Free Space Monitoring Absolute Thresholds",
"name": "journalnode_edits_directory_free_space_absolute_thresholds",
"value": "{\"critical\":\"5.36870912E9\",\"warning\":\"1.073741824E10\"}"
},
{
"desc": "<p>The configured triggers for this role. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.</p><p>Each trigger has all of the following fields:</p><ul><li><span class='code'>triggerName</span> <strong>(mandatory)</strong> - the name of the trigger. This value must be unique for the specific role. </li><li><span class='code'>triggerExpression</span> <strong>(mandatory)</strong> - a tsquery expression representing the trigger. <li><span class='code'>streamThreshold</span> <strong>(optional)</strong> - the maximum number of streams that can satisfy a condition of a trigger before the condition fires. By default set to 0, and any stream returned will cause the condition to fire. <li><span class='code'>enabled</span> <strong> (optional)</strong> - by default set to 'true'. If set to 'false' the trigger will not be evaluated.</p><p>For example, here is a JSON formatted trigger configured for a DataNode that fires if the DataNode has more than 1500 file-descriptors opened:</p><p><pre>[{\"triggerName\": \"sample-trigger\",\n \"triggerExpression\": \"IF (SELECT fd_open WHERE roleName=$ROLENAME and last(fd_open) > 1500) DO health:red\",\n \"streamThreshold\": 0}, \"enabled\": \"true\"]</pre></p><p>Consult the trigger rules documentation for more details on how to write triggers using tsquery.</p><p>The JSON format is evolving and may change in the future and as a result backward compatibility is not guaranteed between releases at this time.</p>",
"display_name": "Role Triggers",
"name": "role_triggers",
"value": "[]"
},
{
"desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.",
"display_name": "Enable Configuration Change Alerts",
"name": "enable_config_alerts",
"value": "false"
},
{
"desc": "Directory where JournalNode will place its log files.",
"display_name": "JournalNode Log Directory",
"name": "journalnode_log_dir",
"value": "/var/log/hadoop-hdfs"
},
{
"desc": "These arguments will be passed as part of the Java command line. Commonly, garbage collection flags or extra debugging flags would be passed here.",
"display_name": "Java Configuration Options for JournalNode",
"name": "journalNode_java_opts",
"value": ""
},
{
"desc": "Weight for the read I/O requests issued by this role. The greater the weight, the higher the priority of the requests when the host experiences I/O contention. Must be between 100 and 1000. Defaults to 1000 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup I/O Weight",
"name": "rm_io_weight",
"value": "500"
},
{
"desc": "Number of CPU shares to assign to this role. The greater the number of shares, the larger the share of the host's CPUs that will be given to this role when the host experiences CPU contention. Must be between 2 and 262144. Defaults to 1024 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup CPU Shares",
"name": "rm_cpu_shares",
"value": "1024"
},
{
"desc": "Port for the JournalNode's HTTP web UI. Combined with the JournalNode's hostname to build its HTTP address.",
"display_name": "JournalNode HTTP Port",
"name": "dfs_journalnode_http_port",
"value": "8480"
},
{
"desc": "Path to directory where heap dumps are generated when java.lang.OutOfMemoryError error is thrown. This directory is automatically created if it doesn't exist. However, if this directory already exists, role user must have write access to this directory. If this directory is shared amongst multiple roles, it should have 1777 permissions. Note that the heap dump files are created with 600 permissions and are owned by the role user. The amount of free space in this directory should be greater than the maximum Java Process heap size configured for this role.",
"display_name": "Heap Dump Directory",
"name": "oom_heap_dump_dir",
"value": "/tmp"
},
{
"desc": "The minimum log level for JournalNode logs",
"display_name": "JournalNode Logging Threshold",
"name": "log_threshold",
"value": "INFO"
}
]

View File

@ -0,0 +1,452 @@
[
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory. Specified as a percentage of the capacity on that filesystem. This setting is not used if a Log Directory Free Space Monitoring Absolute Thresholds setting is configured.",
"display_name": "Log Directory Free Space Monitoring Percentage Thresholds",
"name": "log_directory_free_space_percentage_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"never\"}"
},
{
"desc": "The amount of time allowed after this role is started that failures of health checks that rely on communication with this role will be tolerated.",
"display_name": "Health Check Startup Tolerance",
"name": "namenode_startup_tolerance",
"value": "5"
},
{
"desc": "The health test thresholds of failed status directories in a NameNode.",
"display_name": "NameNode Directory Failures Thresholds",
"name": "namenode_directory_failures_thresholds",
"value": "{\"critical\":\"any\",\"warning\":\"never\"}"
},
{
"desc": "Name of the journal located on each JournalNodes' filesystem.",
"display_name": "Quorum-based Storage Journal name",
"name": "dfs_namenode_quorum_journal_name",
"value": null
},
{
"desc": "When set, generates heap dump file when java.lang.OutOfMemoryError is thrown.",
"display_name": "Dump Heap When Out of Memory",
"name": "oom_heap_dump_enabled",
"value": "false"
},
{
"desc": "The period to review when computing the moving average of garbage collection time.",
"display_name": "Garbage Collection Duration Monitoring Period",
"name": "namenode_gc_duration_window",
"value": "5"
},
{
"desc": "The number of server threads for the NameNode.",
"display_name": "NameNode Handler Count",
"name": "dfs_namenode_handler_count",
"value": "30"
},
{
"desc": "Enables the health test that the NameNode is not in safemode",
"display_name": "NameNode Safemode Health Test",
"name": "namenode_safe_mode_enabled",
"value": "true"
},
{
"desc": "<p>This file contains the rules which govern how log messages are turned into events by the custom log4j appender that this role loads. It is in JSON format, and is composed of a list of rules. Every log message is evaluated against each of these rules in turn to decide whether or not to send an event for that message.</p><p>Each rule has some or all of the following fields:</p><ul><li><span class='code'>alert</span> - whether or not events generated from this rule should be promoted to alerts. A value of \"true\" will cause alerts to be generated. If not specified, the default is \"false\".</li><li><span class='code'>rate</span> <strong>(mandatory)</strong> - the maximum number of log messages matching this rule that may be sent as events every minute. If more than <tt>rate</tt> matching log messages are received in a single minute, the extra messages are ignored. If rate is less than 0, the number of messages per minute is unlimited.</li><li><span class='code'>periodminutes</span> - the number of minutes during which the publisher will only publish <tt>rate</tt> events or fewer. If not specified, the default is <strong>one minute</strong></li><li><span class='code'>threshold</span> - apply this rule only to messages with this log4j severity level or above. An example is \"WARN\" for warning level messages or higher.</li><li><span class='code'>content</span> - match only those messages whose contents match this regular expression.</li><li><span class='code'>exceptiontype</span> - match only those messages which are part of an exception message. The exception type must match this regular expression.</li></ul><br/><p>Example:<span class='code'>{\"alert\": false, \"rate\": 10, \"exceptiontype\": \"java.lang.StringIndexOutOfBoundsException\"}</span></p><p>This rule will send events to Cloudera Manager for every <span class='code'>StringIndexOutOfBoundsException</span>, up to a maximum of 10 every minute.</p>",
"display_name": "Rules to Extract Events from Log Files",
"name": "log_event_whitelist",
"value": "{\n \"version\": \"0\",\n \"rules\": [\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"FATAL\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Instead, use .*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Use .* instead\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.io.IOException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.net.SocketException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.net.SocketClosedException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.io.EOFException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.nio.channels.CancelledKeyException\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 2, \"exceptiontype\": \".*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"Unknown job [^ ]+ being deleted.*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"Error executing shell command .+ No such process.+\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\".*attempt to override final parameter.+\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"[^ ]+ is a deprecated filesystem name. Use.*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\"},\n {\"alert\": false, \"rate\": 1, \"threshold\":\"INFO\", \"content\":\"Triggering checkpoint.*\"}\n ]\n}\n"
},
{
"desc": "The base port where the DFS NameNode web UI listens. If the port number is 0, then the server starts on a free port. Combined with the NameNode's hostname to build its HTTP address.",
"display_name": "NameNode Web UI Port",
"name": "dfs_http_port",
"value": "50070"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>dfs_hosts_exclude.txt</strong> for this role only.",
"display_name": "NameNode Advanced Configuration Snippet (Safety Valve) for dfs_hosts_exclude.txt",
"name": "namenode_hosts_exclude_safety_valve",
"value": null
},
{
"desc": "Mountpoints that are mapped to this NameNode's Nameservice.",
"display_name": "Mountpoints",
"name": "nameservice_mountpoints",
"value": "/"
},
{
"desc": "The health test thresholds of the age of the HDFS namespace checkpoint. Specified as a percentage of the configured checkpoint interval.",
"display_name": "Filesystem Checkpoint Age Monitoring Thresholds",
"name": "namenode_checkpoint_age_thresholds",
"value": "{\"critical\":\"400.0\",\"warning\":\"200.0\"}"
},
{
"desc": "The base port where the secure NameNode web UI listens.",
"display_name": "Secure NameNode Web UI Port (SSL)",
"name": "dfs_https_port",
"value": "50470"
},
{
"desc": "Maximum size for the Java Process heap memory. Passed to Java -Xmx. Measured in bytes.",
"display_name": "Java Heap Size of Namenode in Bytes",
"name": "namenode_java_heapsize",
"value": "1073741824"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>log4j.properties</strong> for this role only.",
"display_name": "NameNode Logging Advanced Configuration Snippet (Safety Valve)",
"name": "log4j_safety_valve",
"value": null
},
{
"desc": "This determines the total amount of block transfers to begin in parallel at a DataNode for replication, when such a command list is being sent over a DataNode heartbeat by the NameNode. The actual number is obtained by multiplying this value by the total number of live nodes in the cluster. The result number is the number of blocks to transfer immediately, per DataNode heartbeat.",
"display_name": "Replication Work Multiplier Per Iteration",
"name": "dfs_namenode_replication_work_multiplier_per_iteration",
"value": "2"
},
{
"desc": "The health test thresholds of the number of transactions since the last HDFS namespace checkpoint. Specified as a percentage of the configured checkpointing transaction limit.",
"display_name": "Filesystem Checkpoint Transactions Monitoring Thresholds",
"name": "namenode_checkpoint_transactions_thresholds",
"value": "{\"critical\":\"400.0\",\"warning\":\"200.0\"}"
},
{
"desc": "When set, this role's process is automatically (and transparently) restarted in the event of an unexpected failure.",
"display_name": "Automatically Restart Process",
"name": "process_auto_restart",
"value": "false"
},
{
"desc": "Determines extension of safemode in milliseconds after the threshold level is reached.",
"display_name": "Safemode Extension",
"name": "dfs_safemode_extension",
"value": "30000"
},
{
"desc": "Determines where on the local file system the NameNode should store the name table (fsimage). For redundancy, enter a comma-delimited list of directories to replicate the name table in all of the directories. Typical values are /data/N/dfs/nn where N=1..3.",
"display_name": "NameNode Data Directories",
"name": "dfs_name_dir_list",
"value": null
},
{
"desc": "Timeout in seconds for the Hue Thrift server running on the NameNode",
"display_name": "Hue Thrift Server Timeout",
"name": "dfs_thrift_timeout",
"value": "60"
},
{
"desc": "Soft memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process if and only if the host is facing memory pressure. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Soft Limit",
"name": "rm_memory_soft_limit",
"value": "-1"
},
{
"desc": "The health test thresholds for the weighted average time spent in Java garbage collection. Specified as a percentage of elapsed wall clock time.",
"display_name": "Garbage Collection Duration Thresholds",
"name": "namenode_gc_duration_thresholds",
"value": "{\"critical\":\"60.0\",\"warning\":\"30.0\"}"
},
{
"desc": "The health check thresholds of the NameNode's RPC latency.",
"display_name": "NameNode RPC Latency Thresholds",
"name": "namenode_rpc_latency_thresholds",
"value": "{\"critical\":\"5000.0\",\"warning\":\"1000.0\"}"
},
{
"desc": "The health test thresholds on the duration of the metrics request to the web server.",
"display_name": "Web Metric Collection Duration",
"name": "namenode_web_metric_collection_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"10000.0\"}"
},
{
"desc": "The number of server threads for the NameNode used for service calls. Only used when NameNode Service RPC Port is configured.",
"display_name": "NameNode Service Handler Count",
"name": "dfs_namenode_service_handler_count",
"value": "30"
},
{
"desc": "Full path to a custom topology script on the host file system. The topology script is used to determine the rack location of nodes. If left blank, a topology script will be provided that uses your hosts' rack information, visible in the \"Hosts\" page.",
"display_name": "Topology Script File Name",
"name": "topology_script_file_name",
"value": null
},
{
"desc": "Enables the health test that the Cloudera Manager Agent can successfully contact and gather metrics from the web server.",
"display_name": "Web Metric Collection",
"name": "namenode_web_metric_collection_enabled",
"value": "true"
},
{
"desc": "Hard memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Hard Limit",
"name": "rm_memory_hard_limit",
"value": "-1"
},
{
"desc": "The time between two periodic file system checkpoints.",
"display_name": "Filesystem Checkpoint Period",
"name": "fs_checkpoint_period",
"value": "3600"
},
{
"desc": "Specifies the number of DataNodes that must be live before the name node exits safemode. Enter a value less than or equal to 0 to take the number of live DataNodes into account when deciding whether to remain in safemode during startup. Values greater than the number of DataNodes in the cluster will make safemode permanent.",
"display_name": "Safemode Minimum DataNodes",
"name": "dfs_safemode_min_datanodes",
"value": "0"
},
{
"desc": "The port where the NameNode runs the HDFS protocol. Combined with the NameNode's hostname to build its address.",
"display_name": "NameNode Port",
"name": "namenode_port",
"value": "8020"
},
{
"desc": "Enables the health test of the upgrade status of the NameNode.",
"display_name": "HDFS Upgrade Status Health Test",
"name": "namenode_upgrade_status_enabled",
"value": "true"
},
{
"desc": "Weight for the read I/O requests issued by this role. The greater the weight, the higher the priority of the requests when the host experiences I/O contention. Must be between 100 and 1000. Defaults to 1000 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup I/O Weight",
"name": "rm_io_weight",
"value": "500"
},
{
"desc": "Advanced Configuration Snippet (Safety Valve) for Hadoop Metrics2. Properties will be inserted into <strong>hadoop-metrics2.properties</strong>.",
"display_name": "Hadoop Metrics2 Advanced Configuration Snippet (Safety Valve)",
"name": "hadoop_metrics2_safety_valve",
"value": null
},
{
"desc": "These arguments will be passed as part of the Java command line. Commonly, garbage collection flags or extra debugging flags would be passed here.",
"display_name": "Java Configuration Options for NameNode",
"name": "namenode_java_opts",
"value": "-XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:-CMSConcurrentMTEnabled -XX:CMSInitiatingOccupancyFraction=70 -XX:+CMSParallelRemarkEnabled"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>hdfs-site.xml</strong> for this role only.",
"display_name": "NameNode Advanced Configuration Snippet (Safety Valve) for hdfs-site.xml",
"name": "namenode_config_safety_valve",
"value": null
},
{
"desc": "The period to review when computing the moving average of the NameNode's RPC latency.",
"display_name": "NameNode RPC Latency Monitoring Window",
"name": "namenode_rpc_latency_window",
"value": "5"
},
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory.",
"display_name": "Log Directory Free Space Monitoring Absolute Thresholds",
"name": "log_directory_free_space_absolute_thresholds",
"value": "{\"critical\":\"5.36870912E9\",\"warning\":\"1.073741824E10\"}"
},
{
"desc": "Optional port for the service-rpc address which can be used by HDFS daemons instead of sharing the RPC address used by the clients.",
"display_name": "NameNode Service RPC Port",
"name": "dfs_namenode_servicerpc_address",
"value": null
},
{
"desc": "When computing the overall NameNode health, consider the host's health.",
"display_name": "NameNode Host Health Test",
"name": "namenode_host_health_enabled",
"value": "true"
},
{
"desc": "If configured, overrides the process soft and hard rlimits (also called ulimits) for file descriptors to the configured value.",
"display_name": "Maximum Process File Descriptors",
"name": "rlimit_fds",
"value": null
},
{
"desc": "When set, Cloudera Manager will send alerts when the health of this role reaches the threshold specified by the EventServer setting eventserver_health_events_alert_threshold",
"display_name": "Enable Health Alerts for this Role",
"name": "enable_alerts",
"value": "true"
},
{
"desc": "The maximum size, in megabytes, per log file for NameNode logs. Typically used by log4j.",
"display_name": "NameNode Max Log Size",
"name": "max_log_size",
"value": "200"
},
{
"desc": "When set, a SIGKILL signal is sent to the role process when java.lang.OutOfMemoryError is thrown.",
"display_name": "Kill When Out of Memory",
"name": "oom_sigkill_enabled",
"value": "true"
},
{
"desc": "Enables the health test that the NameNode's process state is consistent with the role configuration",
"display_name": "NameNode Process Health Test",
"name": "namenode_scm_health_enabled",
"value": "true"
},
{
"desc": "The health check thresholds for the number of out-of-sync JournalNodes for this NameNode.",
"display_name": "NameNode Out-Of-Sync JournalNodes Thresholds",
"name": "namenode_out_of_sync_journal_nodes_thresholds",
"value": "{\"critical\":\"any\",\"warning\":\"never\"}"
},
{
"desc": "The period to review when computing unexpected exits.",
"display_name": "Unexpected Exits Monitoring Period",
"name": "unexpected_exits_window",
"value": "5"
},
{
"desc": "The health test thresholds for unexpected exits encountered within a recent period specified by the unexpected_exits_window configuration for the role.",
"display_name": "Unexpected Exits Thresholds",
"name": "unexpected_exits_thresholds",
"value": "{\"critical\":\"any\",\"warning\":\"never\"}"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>dfs_hosts_allow.txt</strong> for this role only.",
"display_name": "NameNode Advanced Configuration Snippet (Safety Valve) for dfs_hosts_allow.txt",
"name": "namenode_hosts_allow_safety_valve",
"value": null
},
{
"desc": "Number of minutes between trash checkpoints. Also controls the number of minutes after which a trash checkpoint directory is deleted. To disable the trash feature, enter 0.",
"display_name": "Filesystem Trash Interval",
"name": "fs_trash_interval",
"value": "1440"
},
{
"desc": "The access time for HDFS file is precise upto this value. Setting the value of 0 disables access times for HDFS. When using the NFS Gateway role, make sure this property is enabled.",
"display_name": "Access Time Precision",
"name": "dfs_access_time_precision",
"value": "3600000"
},
{
"desc": "<p>The configured triggers for this role. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.</p><p>Each trigger has all of the following fields:</p><ul><li><span class='code'>triggerName</span> <strong>(mandatory)</strong> - the name of the trigger. This value must be unique for the specific role. </li><li><span class='code'>triggerExpression</span> <strong>(mandatory)</strong> - a tsquery expression representing the trigger. <li><span class='code'>streamThreshold</span> <strong>(optional)</strong> - the maximum number of streams that can satisfy a condition of a trigger before the condition fires. By default set to 0, and any stream returned will cause the condition to fire. <li><span class='code'>enabled</span> <strong> (optional)</strong> - by default set to 'true'. If set to 'false' the trigger will not be evaluated.</p><p>For example, here is a JSON formatted trigger configured for a DataNode that fires if the DataNode has more than 1500 file-descriptors opened:</p><p><pre>[{\"triggerName\": \"sample-trigger\",\n \"triggerExpression\": \"IF (SELECT fd_open WHERE roleName=$ROLENAME and last(fd_open) > 1500) DO health:red\",\n \"streamThreshold\": 0}, \"enabled\": \"true\"]</pre></p><p>Consult the trigger rules documentation for more details on how to write triggers using tsquery.</p><p>The JSON format is evolving and may change in the future and as a result backward compatibility is not guaranteed between releases at this time.</p>",
"display_name": "Role Triggers",
"name": "role_triggers",
"value": "[]"
},
{
"desc": "Minimum number of running threads for the Hue Thrift server running on the NameNode",
"display_name": "Hue Thrift Server Min Threadcount",
"name": "dfs_thrift_threads_min",
"value": "10"
},
{
"desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.",
"display_name": "Enable Configuration Change Alerts",
"name": "enable_config_alerts",
"value": "false"
},
{
"desc": "If enabled, the NameNode binds to the wildcard address (\"0.0.0.0\") on all of its ports.",
"display_name": "Bind NameNode to Wildcard Address",
"name": "namenode_bind_wildcard",
"value": "false"
},
{
"desc": "Comma-separated list of NameNode plug-ins to be activated. If one plug-in cannot be loaded, all the plug-ins are ignored.",
"display_name": "NameNode Plugins",
"name": "dfs_namenode_plugins_list",
"value": ""
},
{
"desc": "Directory on a shared storage device, such as a Quorum-based Storage URI or a local directory that is an NFS mount from a NAS, to store the NameNode edits. The value of this configuration is automatically generated to be the Quourm Journal URI if there are JournalNodes and this NameNode is Highly Available.",
"display_name": "Shared Edits Directory",
"name": "dfs_namenode_shared_edits_dir",
"value": null
},
{
"desc": "This determines the percentage amount of block invalidations (deletes) to do over a single DataNode heartbeat deletion command. The final deletion count is determined by applying this percentage to the number of live nodes in the system. The resultant number is the number of blocks from the deletion list chosen for proper invalidation over a single heartbeat of a single DataNode.",
"display_name": "Invalidate Work Percentage Per Iteration",
"name": "dfs_namenode_invalidate_work_pct_per_iteration",
"value": "0.32"
},
{
"desc": "Enable Automatic Failover to maintain High Availability. Requires a ZooKeeper service and a High Availability NameNode partner.",
"display_name": "Enable Automatic Failover",
"name": "autofailover_enabled",
"value": "false"
},
{
"desc": "The maximum number of rolled log files to keep for NameNode logs. Typically used by log4j.",
"display_name": "NameNode Maximum Log File Backups",
"name": "max_log_backup_index",
"value": "10"
},
{
"desc": "Maximum number of running threads for the Hue Thrift server running on the NameNode",
"display_name": "Hue Thrift Server Max Threadcount",
"name": "dfs_thrift_threads_max",
"value": "20"
},
{
"desc": "Directory where NameNode will place its log files.",
"display_name": "NameNode Log Directory",
"name": "namenode_log_dir",
"value": "/var/log/hadoop-hdfs"
},
{
"desc": "Number of CPU shares to assign to this role. The greater the number of shares, the larger the share of the host's CPUs that will be given to this role when the host experiences CPU contention. Must be between 2 and 262144. Defaults to 1024 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup CPU Shares",
"name": "rm_cpu_shares",
"value": "1024"
},
{
"desc": "The health test thresholds of the number of file descriptors used. Specified as a percentage of file descriptor limit.",
"display_name": "File Descriptor Monitoring Thresholds",
"name": "namenode_fd_thresholds",
"value": "{\"critical\":\"70.0\",\"warning\":\"50.0\"}"
},
{
"desc": "Path to directory where heap dumps are generated when java.lang.OutOfMemoryError error is thrown. This directory is automatically created if it doesn't exist. However, if this directory already exists, role user must have write access to this directory. If this directory is shared amongst multiple roles, it should have 1777 permissions. Note that the heap dump files are created with 600 permissions and are owned by the role user. The amount of free space in this directory should be greater than the maximum Java Process heap size configured for this role.",
"display_name": "Heap Dump Directory",
"name": "oom_heap_dump_dir",
"value": "/tmp"
},
{
"desc": "The health test thresholds for monitoring of free space on the filesystems that contain this role's data directories. Specified as a percentage of the capacity on the filesystem. This setting is not used if a Data Directories Free Space Monitoring Absolute Thresholds setting is configured.",
"display_name": "Data Directories Free Space Monitoring Percentage Thresholds",
"name": "namenode_data_directories_free_space_percentage_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"never\"}"
},
{
"desc": "Nameservice of this NameNode. The Nameservice represents the interface to this NameNode and its High Availability partner. The Nameservice also represents the namespace associated with a federated NameNode.",
"display_name": "NameNode Nameservice",
"name": "dfs_federation_namenode_nameservice",
"value": null
},
{
"desc": "The number of transactions after which the NameNode or SecondaryNameNode will create a checkpoint of the namespace, regardless of whether the checkpoint period has expired.",
"display_name": "Filesystem Checkpoint Transaction Threshold",
"name": "fs_checkpoint_txns",
"value": "1000000"
},
{
"desc": "The minimum log level for NameNode logs",
"display_name": "NameNode Logging Threshold",
"name": "log_threshold",
"value": "INFO"
},
{
"desc": "Specifies the percentage of blocks that should satisfy the minimal replication requirement defined by dfs.replication.min. Enter a value less than or equal to 0 to wait for any particular percentage of blocks before exiting safemode. Values greater than 1 will make safemode permanent.",
"display_name": "Safemode Threshold Percentage",
"name": "dfs_safemode_threshold_pct",
"value": "0.999"
},
{
"desc": "Directories on the local file system to store the NameNode edits. If not set, the edits are stored in the NameNode's Data Directories. The value of this configuration is automatically generated to be the Quorum-based Storage URI if there are JournalNodes and this NameNode is not Highly Available.",
"display_name": "NameNode Edits Directories",
"name": "dfs_namenode_edits_dir",
"value": null
},
{
"desc": "If set to false and if one of the replicas of the NameNode storage fails, such as temporarily failure of NFS, this directory is not used until the NameNode restarts. If enabled, failed storage is re-checked on every checkpoint and, if it becomes valid, the NameNode will try to restore the edits and fsimage.",
"display_name": "Restore NameNode Directories at Checkpoint Time",
"name": "dfs_name_dir_restore",
"value": "false"
},
{
"desc": "The health test thresholds for monitoring of free space on the filesystems that contain this role's data directories.",
"display_name": "Data Directories Free Space Monitoring Absolute Thresholds",
"name": "namenode_data_directories_free_space_absolute_thresholds",
"value": "{\"critical\":\"5.36870912E9\",\"warning\":\"1.073741824E10\"}"
}
]

View File

@ -0,0 +1,200 @@
[
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory. Specified as a percentage of the capacity on that filesystem. This setting is not used if a Log Directory Free Space Monitoring Absolute Thresholds setting is configured.",
"display_name": "Log Directory Free Space Monitoring Percentage Thresholds",
"name": "log_directory_free_space_percentage_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"never\"}"
},
{
"desc": "<p>This file contains the rules which govern how log messages are turned into events by the custom log4j appender that this role loads. It is in JSON format, and is composed of a list of rules. Every log message is evaluated against each of these rules in turn to decide whether or not to send an event for that message.</p><p>Each rule has some or all of the following fields:</p><ul><li><span class='code'>alert</span> - whether or not events generated from this rule should be promoted to alerts. A value of \"true\" will cause alerts to be generated. If not specified, the default is \"false\".</li><li><span class='code'>rate</span> <strong>(mandatory)</strong> - the maximum number of log messages matching this rule that may be sent as events every minute. If more than <tt>rate</tt> matching log messages are received in a single minute, the extra messages are ignored. If rate is less than 0, the number of messages per minute is unlimited.</li><li><span class='code'>periodminutes</span> - the number of minutes during which the publisher will only publish <tt>rate</tt> events or fewer. If not specified, the default is <strong>one minute</strong></li><li><span class='code'>threshold</span> - apply this rule only to messages with this log4j severity level or above. An example is \"WARN\" for warning level messages or higher.</li><li><span class='code'>content</span> - match only those messages whose contents match this regular expression.</li><li><span class='code'>exceptiontype</span> - match only those messages which are part of an exception message. The exception type must match this regular expression.</li></ul><br/><p>Example:<span class='code'>{\"alert\": false, \"rate\": 10, \"exceptiontype\": \"java.lang.StringIndexOutOfBoundsException\"}</span></p><p>This rule will send events to Cloudera Manager for every <span class='code'>StringIndexOutOfBoundsException</span>, up to a maximum of 10 every minute.</p>",
"display_name": "Rules to Extract Events from Log Files",
"name": "log_event_whitelist",
"value": "{\n \"version\": \"0\",\n \"rules\": [\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"FATAL\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 2, \"exceptiontype\": \".*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\"}\n ]\n}"
},
{
"desc": "The health test thresholds of the number of file descriptors used. Specified as a percentage of file descriptor limit.",
"display_name": "File Descriptor Monitoring Thresholds",
"name": "nfsgateway_fd_thresholds",
"value": "{\"critical\":\"70.0\",\"warning\":\"50.0\"}"
},
{
"desc": "NFS clients often reorder writes. As a result, sequential writes can arrive at the NFS Gateway in random order. This directory is used to temporarily save out-of-order writes before writing to HDFS. For each file, the out-of-order writes are dumped after they are accumulated to exceed certain threshold (e.g., 1MB) in memory. Please make sure this directory has enough space. For example, if the application uploads 10 files with each having 100MB, it is recommended that this directory have roughly 1GB of space in case write reorder happens (in the worst case) to every file.",
"display_name": "Temporary Dump Directory",
"name": "dfs_nfs3_dump_dir",
"value": "/tmp/.hdfs-nfs"
},
{
"desc": "Enables the health test that the NFS Gateway's process state is consistent with the role configuration",
"display_name": "NFS Gateway Process Health Test",
"name": "nfsgateway_scm_health_enabled",
"value": "true"
},
{
"desc": "Maximum size for the Java Process heap memory. Passed to Java -Xmx. Measured in bytes.",
"display_name": "Java Heap Size of NFS Gateway in Bytes",
"name": "nfsgateway_java_heapsize",
"value": "268435456"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>hdfs-site.xml</strong> for this role only.",
"display_name": "NFS Gateway Advanced Configuration Snippet (Safety Valve) for hdfs-site.xml",
"name": "nfsgateway_config_safety_valve",
"value": null
},
{
"desc": "The port number of the system portmap or rpcbind service. This configuration is used by Cloudera Manager to verify if the system portmap or rpcbind service is running before starting NFS Gateway role. Cloudera Manager does not manage the system portmap or rpcbind service.",
"display_name": "Portmap (or Rpcbind) Port",
"name": "nfs3_portmap_port",
"value": "111"
},
{
"desc": "The NFS Gateway server port.",
"display_name": "NFS Gateway Server Port",
"name": "nfs3_server_port",
"value": "2049"
},
{
"desc": "These arguments will be passed as part of the Java command line. Commonly, garbage collection flags or extra debugging flags would be passed here.",
"display_name": "Java Configuration Options for NFS Gateway",
"name": "nfsgateway_java_opts",
"value": ""
},
{
"desc": "The maximum number of rolled log files to keep for NFS Gateway logs. Typically used by log4j.",
"display_name": "NFS Gateway Maximum Log File Backups",
"name": "max_log_backup_index",
"value": "10"
},
{
"desc": "<p>The configured triggers for this role. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.</p><p>Each trigger has all of the following fields:</p><ul><li><span class='code'>triggerName</span> <strong>(mandatory)</strong> - the name of the trigger. This value must be unique for the specific role. </li><li><span class='code'>triggerExpression</span> <strong>(mandatory)</strong> - a tsquery expression representing the trigger. <li><span class='code'>streamThreshold</span> <strong>(optional)</strong> - the maximum number of streams that can satisfy a condition of a trigger before the condition fires. By default set to 0, and any stream returned will cause the condition to fire. <li><span class='code'>enabled</span> <strong> (optional)</strong> - by default set to 'true'. If set to 'false' the trigger will not be evaluated.</p><p>For example, here is a JSON formatted trigger configured for a DataNode that fires if the DataNode has more than 1500 file-descriptors opened:</p><p><pre>[{\"triggerName\": \"sample-trigger\",\n \"triggerExpression\": \"IF (SELECT fd_open WHERE roleName=$ROLENAME and last(fd_open) > 1500) DO health:red\",\n \"streamThreshold\": 0}, \"enabled\": \"true\"]</pre></p><p>Consult the trigger rules documentation for more details on how to write triggers using tsquery.</p><p>The JSON format is evolving and may change in the future and as a result backward compatibility is not guaranteed between releases at this time.</p>",
"display_name": "Role Triggers",
"name": "role_triggers",
"value": "[]"
},
{
"desc": "Soft memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process if and only if the host is facing memory pressure. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Soft Limit",
"name": "rm_memory_soft_limit",
"value": "-1"
},
{
"desc": "When computing the overall NFS Gateway health, consider the host's health.",
"display_name": "NFS Gateway Host Health Test",
"name": "nfsgateway_host_health_enabled",
"value": "true"
},
{
"desc": "Hard memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Hard Limit",
"name": "rm_memory_hard_limit",
"value": "-1"
},
{
"desc": "By default, NFS Gateway exported directories can be mounted by any client. For better access control, update this property with a list of host names and access privileges separated by whitespace characters. Host name format can be a single host, a Java regular expression, or an IPv4 address. The access privilege uses <strong>rw</strong> to specify readwrite and <strong>ro</strong> to specify readonly access. If the access privilege is not provided, the default is read-only. Examples of host name format and access privilege: \"192.168.0.0/22 rw\", \"host.*.example.com\", \"host1.test.org ro\".",
"display_name": "Allowed Hosts and Privileges",
"name": "dfs_nfs_exports_allowed_hosts",
"value": "* rw"
},
{
"desc": "When set, this role's process is automatically (and transparently) restarted in the event of an unexpected failure.",
"display_name": "Automatically Restart Process",
"name": "process_auto_restart",
"value": "false"
},
{
"desc": "The port number of the mount daemon implemented inside the NFS Gateway server role.",
"display_name": "NFS Gateway MountD Port",
"name": "nfs3_mountd_port",
"value": "4242"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>log4j.properties</strong> for this role only.",
"display_name": "NFS Gateway Logging Advanced Configuration Snippet (Safety Valve)",
"name": "log4j_safety_valve",
"value": null
},
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory.",
"display_name": "Log Directory Free Space Monitoring Absolute Thresholds",
"name": "log_directory_free_space_absolute_thresholds",
"value": "{\"critical\":\"5.36870912E9\",\"warning\":\"1.073741824E10\"}"
},
{
"desc": "If configured, overrides the process soft and hard rlimits (also called ulimits) for file descriptors to the configured value.",
"display_name": "Maximum Process File Descriptors",
"name": "rlimit_fds",
"value": null
},
{
"desc": "When set, Cloudera Manager will send alerts when the health of this role reaches the threshold specified by the EventServer setting eventserver_health_events_alert_threshold",
"display_name": "Enable Health Alerts for this Role",
"name": "enable_alerts",
"value": "true"
},
{
"desc": "The maximum size, in megabytes, per log file for NFS Gateway logs. Typically used by log4j.",
"display_name": "NFS Gateway Max Log Size",
"name": "max_log_size",
"value": "200"
},
{
"desc": "When set, a SIGKILL signal is sent to the role process when java.lang.OutOfMemoryError is thrown.",
"display_name": "Kill When Out of Memory",
"name": "oom_sigkill_enabled",
"value": "true"
},
{
"desc": "When set, generates heap dump file when java.lang.OutOfMemoryError is thrown.",
"display_name": "Dump Heap When Out of Memory",
"name": "oom_heap_dump_enabled",
"value": "false"
},
{
"desc": "The period to review when computing unexpected exits.",
"display_name": "Unexpected Exits Monitoring Period",
"name": "unexpected_exits_window",
"value": "5"
},
{
"desc": "The health test thresholds for unexpected exits encountered within a recent period specified by the unexpected_exits_window configuration for the role.",
"display_name": "Unexpected Exits Thresholds",
"name": "unexpected_exits_thresholds",
"value": "{\"critical\":\"any\",\"warning\":\"never\"}"
},
{
"desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.",
"display_name": "Enable Configuration Change Alerts",
"name": "enable_config_alerts",
"value": "false"
},
{
"desc": "Weight for the read I/O requests issued by this role. The greater the weight, the higher the priority of the requests when the host experiences I/O contention. Must be between 100 and 1000. Defaults to 1000 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup I/O Weight",
"name": "rm_io_weight",
"value": "500"
},
{
"desc": "Number of CPU shares to assign to this role. The greater the number of shares, the larger the share of the host's CPUs that will be given to this role when the host experiences CPU contention. Must be between 2 and 262144. Defaults to 1024 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup CPU Shares",
"name": "rm_cpu_shares",
"value": "1024"
},
{
"desc": "Path to directory where heap dumps are generated when java.lang.OutOfMemoryError error is thrown. This directory is automatically created if it doesn't exist. However, if this directory already exists, role user must have write access to this directory. If this directory is shared amongst multiple roles, it should have 1777 permissions. Note that the heap dump files are created with 600 permissions and are owned by the role user. The amount of free space in this directory should be greater than the maximum Java Process heap size configured for this role.",
"display_name": "Heap Dump Directory",
"name": "oom_heap_dump_dir",
"value": "/tmp"
},
{
"desc": "The minimum log level for NFS Gateway logs",
"display_name": "NFS Gateway Logging Threshold",
"name": "log_threshold",
"value": "INFO"
},
{
"desc": "Directory where NFS Gateway will place its log files.",
"display_name": "NFS Gateway Log Directory",
"name": "nfsgateway_log_dir",
"value": "/var/log/hadoop-hdfs"
}
]

View File

@ -0,0 +1,254 @@
[
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory. Specified as a percentage of the capacity on that filesystem. This setting is not used if a Log Directory Free Space Monitoring Absolute Thresholds setting is configured.",
"display_name": "Log Directory Free Space Monitoring Percentage Thresholds",
"name": "log_directory_free_space_percentage_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"never\"}"
},
{
"desc": "<p>This file contains the rules which govern how log messages are turned into events by the custom log4j appender that this role loads. It is in JSON format, and is composed of a list of rules. Every log message is evaluated against each of these rules in turn to decide whether or not to send an event for that message.</p><p>Each rule has some or all of the following fields:</p><ul><li><span class='code'>alert</span> - whether or not events generated from this rule should be promoted to alerts. A value of \"true\" will cause alerts to be generated. If not specified, the default is \"false\".</li><li><span class='code'>rate</span> <strong>(mandatory)</strong> - the maximum number of log messages matching this rule that may be sent as events every minute. If more than <tt>rate</tt> matching log messages are received in a single minute, the extra messages are ignored. If rate is less than 0, the number of messages per minute is unlimited.</li><li><span class='code'>periodminutes</span> - the number of minutes during which the publisher will only publish <tt>rate</tt> events or fewer. If not specified, the default is <strong>one minute</strong></li><li><span class='code'>threshold</span> - apply this rule only to messages with this log4j severity level or above. An example is \"WARN\" for warning level messages or higher.</li><li><span class='code'>content</span> - match only those messages whose contents match this regular expression.</li><li><span class='code'>exceptiontype</span> - match only those messages which are part of an exception message. The exception type must match this regular expression.</li></ul><br/><p>Example:<span class='code'>{\"alert\": false, \"rate\": 10, \"exceptiontype\": \"java.lang.StringIndexOutOfBoundsException\"}</span></p><p>This rule will send events to Cloudera Manager for every <span class='code'>StringIndexOutOfBoundsException</span>, up to a maximum of 10 every minute.</p>",
"display_name": "Rules to Extract Events from Log Files",
"name": "log_event_whitelist",
"value": "{\n \"version\": \"0\",\n \"rules\": [\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"FATAL\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Instead, use .*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Use .* instead\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.io.IOException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.net.SocketException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.net.SocketClosedException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.io.EOFException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.nio.channels.CancelledKeyException\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 2, \"exceptiontype\": \".*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"Unknown job [^ ]+ being deleted.*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"Error executing shell command .+ No such process.+\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\".*attempt to override final parameter.+\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"[^ ]+ is a deprecated filesystem name. Use.*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\"},\n {\"alert\": false, \"rate\": 1, \"threshold\":\"INFO\", \"content\":\"Triggering checkpoint.*\"}\n ]\n}\n"
},
{
"desc": "These arguments will be passed as part of the Java command line. Commonly, garbage collection flags or extra debugging flags would be passed here.",
"display_name": "Java Configuration Options for Secondary NameNode",
"name": "secondarynamenode_java_opts",
"value": "-XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:-CMSConcurrentMTEnabled -XX:CMSInitiatingOccupancyFraction=70 -XX:+CMSParallelRemarkEnabled"
},
{
"desc": "When set, generates heap dump file when java.lang.OutOfMemoryError is thrown.",
"display_name": "Dump Heap When Out of Memory",
"name": "oom_heap_dump_enabled",
"value": "false"
},
{
"desc": "The health test thresholds for monitoring of free space on the filesystems that contain this role's checkpoint directories.",
"display_name": "Checkpoint Directories Free Space Monitoring Absolute Thresholds",
"name": "secondarynamenode_checkpoint_directories_free_space_absolute_thresholds",
"value": "{\"critical\":\"5.36870912E9\",\"warning\":\"1.073741824E10\"}"
},
{
"desc": "Maximum size for the Java Process heap memory. Passed to Java -Xmx. Measured in bytes.",
"display_name": "Java Heap Size of Secondary namenode in Bytes",
"name": "secondary_namenode_java_heapsize",
"value": "1073741824"
},
{
"desc": "When set, this role's process is automatically (and transparently) restarted in the event of an unexpected failure.",
"display_name": "Automatically Restart Process",
"name": "process_auto_restart",
"value": "false"
},
{
"desc": "The maximum number of rolled log files to keep for SecondaryNameNode logs. Typically used by log4j.",
"display_name": "SecondaryNameNode Maximum Log File Backups",
"name": "max_log_backup_index",
"value": "10"
},
{
"desc": "<p>The configured triggers for this role. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.</p><p>Each trigger has all of the following fields:</p><ul><li><span class='code'>triggerName</span> <strong>(mandatory)</strong> - the name of the trigger. This value must be unique for the specific role. </li><li><span class='code'>triggerExpression</span> <strong>(mandatory)</strong> - a tsquery expression representing the trigger. <li><span class='code'>streamThreshold</span> <strong>(optional)</strong> - the maximum number of streams that can satisfy a condition of a trigger before the condition fires. By default set to 0, and any stream returned will cause the condition to fire. <li><span class='code'>enabled</span> <strong> (optional)</strong> - by default set to 'true'. If set to 'false' the trigger will not be evaluated.</p><p>For example, here is a JSON formatted trigger configured for a DataNode that fires if the DataNode has more than 1500 file-descriptors opened:</p><p><pre>[{\"triggerName\": \"sample-trigger\",\n \"triggerExpression\": \"IF (SELECT fd_open WHERE roleName=$ROLENAME and last(fd_open) > 1500) DO health:red\",\n \"streamThreshold\": 0}, \"enabled\": \"true\"]</pre></p><p>Consult the trigger rules documentation for more details on how to write triggers using tsquery.</p><p>The JSON format is evolving and may change in the future and as a result backward compatibility is not guaranteed between releases at this time.</p>",
"display_name": "Role Triggers",
"name": "role_triggers",
"value": "[]"
},
{
"desc": "Soft memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process if and only if the host is facing memory pressure. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Soft Limit",
"name": "rm_memory_soft_limit",
"value": "-1"
},
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory.",
"display_name": "Log Directory Free Space Monitoring Absolute Thresholds",
"name": "log_directory_free_space_absolute_thresholds",
"value": "{\"critical\":\"5.36870912E9\",\"warning\":\"1.073741824E10\"}"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>hdfs-site.xml</strong> for this role only.",
"display_name": "SecondaryNameNode Advanced Configuration Snippet (Safety Valve) for hdfs-site.xml",
"name": "secondarynamenode_config_safety_valve",
"value": null
},
{
"desc": "Enables the health test that the SecondaryNameNode's process state is consistent with the role configuration",
"display_name": "SecondaryNameNode Process Health Test",
"name": "secondarynamenode_scm_health_enabled",
"value": "true"
},
{
"desc": "Hard memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Hard Limit",
"name": "rm_memory_hard_limit",
"value": "-1"
},
{
"desc": "The time between two periodic file system checkpoints.",
"display_name": "Filesystem Checkpoint Period",
"name": "fs_checkpoint_period",
"value": "3600"
},
{
"desc": "The period to review when computing unexpected exits.",
"display_name": "Unexpected Exits Monitoring Period",
"name": "unexpected_exits_window",
"value": "5"
},
{
"desc": "Advanced Configuration Snippet (Safety Valve) for Hadoop Metrics2. Properties will be inserted into <strong>hadoop-metrics2.properties</strong>.",
"display_name": "Hadoop Metrics2 Advanced Configuration Snippet (Safety Valve)",
"name": "hadoop_metrics2_safety_valve",
"value": null
},
{
"desc": "If enabled, the SecondaryNameNode binds to the wildcard address (\"0.0.0.0\") on all of its ports.",
"display_name": "Bind SecondaryNameNode to Wildcard Address",
"name": "secondary_namenode_bind_wildcard",
"value": "false"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>log4j.properties</strong> for this role only.",
"display_name": "SecondaryNameNode Logging Advanced Configuration Snippet (Safety Valve)",
"name": "log4j_safety_valve",
"value": null
},
{
"desc": "The number of transactions after which the NameNode or SecondaryNameNode will create a checkpoint of the namespace, regardless of whether the checkpoint period has expired.",
"display_name": "Filesystem Checkpoint Transaction Threshold",
"name": "fs_checkpoint_txns",
"value": "1000000"
},
{
"desc": "Enables the health test that the Cloudera Manager Agent can successfully contact and gather metrics from the web server.",
"display_name": "Web Metric Collection",
"name": "secondarynamenode_web_metric_collection_enabled",
"value": "true"
},
{
"desc": "If configured, overrides the process soft and hard rlimits (also called ulimits) for file descriptors to the configured value.",
"display_name": "Maximum Process File Descriptors",
"name": "rlimit_fds",
"value": null
},
{
"desc": "The health test thresholds on the duration of the metrics request to the web server.",
"display_name": "Web Metric Collection Duration",
"name": "secondarynamenode_web_metric_collection_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"10000.0\"}"
},
{
"desc": "When set, Cloudera Manager will send alerts when the health of this role reaches the threshold specified by the EventServer setting eventserver_health_events_alert_threshold",
"display_name": "Enable Health Alerts for this Role",
"name": "enable_alerts",
"value": "true"
},
{
"desc": "The health test thresholds for monitoring of free space on the filesystems that contain this role's checkpoint directories. Specified as a percentage of the capacity on the filesystem. This setting is not used if a Checkpoint Directories Free Space Monitoring Absolute Thresholds setting is configured.",
"display_name": "Checkpoint Directories Free Space Monitoring Percentage Thresholds",
"name": "secondarynamenode_checkpoint_directories_free_space_percentage_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"never\"}"
},
{
"desc": "When computing the overall SecondaryNameNode health, consider the host's health.",
"display_name": "SecondaryNameNode Host Health Test",
"name": "secondarynamenode_host_health_enabled",
"value": "true"
},
{
"desc": "The maximum size, in megabytes, per log file for SecondaryNameNode logs. Typically used by log4j.",
"display_name": "SecondaryNameNode Max Log Size",
"name": "max_log_size",
"value": "200"
},
{
"desc": "When set, a SIGKILL signal is sent to the role process when java.lang.OutOfMemoryError is thrown.",
"display_name": "Kill When Out of Memory",
"name": "oom_sigkill_enabled",
"value": "true"
},
{
"desc": "The health test thresholds for the weighted average time spent in Java garbage collection. Specified as a percentage of elapsed wall clock time.",
"display_name": "Garbage Collection Duration Thresholds",
"name": "secondarynamenode_gc_duration_thresholds",
"value": "{\"critical\":\"60.0\",\"warning\":\"30.0\"}"
},
{
"desc": "The health test thresholds for unexpected exits encountered within a recent period specified by the unexpected_exits_window configuration for the role.",
"display_name": "Unexpected Exits Thresholds",
"name": "unexpected_exits_thresholds",
"value": "{\"critical\":\"any\",\"warning\":\"never\"}"
},
{
"desc": "The health test thresholds of the number of file descriptors used. Specified as a percentage of file descriptor limit.",
"display_name": "File Descriptor Monitoring Thresholds",
"name": "secondarynamenode_fd_thresholds",
"value": "{\"critical\":\"70.0\",\"warning\":\"50.0\"}"
},
{
"desc": "The period to review when computing the moving average of garbage collection time.",
"display_name": "Garbage Collection Duration Monitoring Period",
"name": "secondarynamenode_gc_duration_window",
"value": "5"
},
{
"desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.",
"display_name": "Enable Configuration Change Alerts",
"name": "enable_config_alerts",
"value": "false"
},
{
"desc": "Determines where on the local file system the DFS SecondaryNameNode should store the temporary images to merge. For redundancy, enter a comma-delimited list of directories to replicate the image in all of the directories. Typical values are /data/N/dfs/snn for N = 1, 2, 3...",
"display_name": "HDFS Checkpoint Directory",
"name": "fs_checkpoint_dir_list",
"value": null
},
{
"desc": "Weight for the read I/O requests issued by this role. The greater the weight, the higher the priority of the requests when the host experiences I/O contention. Must be between 100 and 1000. Defaults to 1000 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup I/O Weight",
"name": "rm_io_weight",
"value": "500"
},
{
"desc": "Number of CPU shares to assign to this role. The greater the number of shares, the larger the share of the host's CPUs that will be given to this role when the host experiences CPU contention. Must be between 2 and 262144. Defaults to 1024 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup CPU Shares",
"name": "rm_cpu_shares",
"value": "1024"
},
{
"desc": "Path to directory where heap dumps are generated when java.lang.OutOfMemoryError error is thrown. This directory is automatically created if it doesn't exist. However, if this directory already exists, role user must have write access to this directory. If this directory is shared amongst multiple roles, it should have 1777 permissions. Note that the heap dump files are created with 600 permissions and are owned by the role user. The amount of free space in this directory should be greater than the maximum Java Process heap size configured for this role.",
"display_name": "Heap Dump Directory",
"name": "oom_heap_dump_dir",
"value": "/tmp"
},
{
"desc": "Nameservice of this SecondaryNameNode",
"display_name": "SecondaryNameNode Nameservice",
"name": "dfs_secondarynamenode_nameservice",
"value": null
},
{
"desc": "The minimum log level for SecondaryNameNode logs",
"display_name": "SecondaryNameNode Logging Threshold",
"name": "log_threshold",
"value": "INFO"
},
{
"desc": "The base port where the secure SecondaryNameNode web UI listens.",
"display_name": "Secure SecondaryNameNode Web UI Port (SSL)",
"name": "dfs_secondary_https_port",
"value": "50495"
},
{
"desc": "Directory where SecondaryNameNode will place its log files.",
"display_name": "SecondaryNameNode Log Directory",
"name": "secondarynamenode_log_dir",
"value": "/var/log/hadoop-hdfs"
},
{
"desc": "The SecondaryNameNode HTTP port. If the port is 0, then the server starts on a free port. Combined with the SecondaryNameNode's hostname to build its HTTP address.",
"display_name": "SecondaryNameNode Web UI Port",
"name": "dfs_secondary_http_port",
"value": "50090"
}
]

View File

@ -0,0 +1,608 @@
[
{
"desc": "Timeout in milliseconds for the parallel RPCs made in DistributedFileSystem#getFileBlockStorageLocations(). This value is only emitted for Impala.",
"display_name": "HDFS File Block Storage Location Timeout",
"name": "dfs_client_file_block_storage_locations_timeout",
"value": "10000"
},
{
"desc": "The domain to use for the HTTP cookie that stores the authentication token. In order for authentiation to work correctly across all Hadoop nodes' web-consoles the domain must be correctly set. <b>Important:</b> when using IP addresses, browsers ignore cookies with domain settings. For this setting to work properly all nodes in the cluster must be configured to generate URLs with hostname.domain names on it.",
"display_name": "Hadoop HTTP Authentication Cookie Domain",
"name": "hadoop_http_auth_cookie_domain",
"value": ""
},
{
"desc": "The user that this service's processes should run as (except the HttpFS server, which has its own user)",
"display_name": "System User",
"name": "process_username",
"value": "hdfs"
},
{
"desc": "<p>Event filters are defined in a JSON object like the following:</p>\n\n<pre>\n{\n \"defaultAction\" : (\"accept\", \"discard\"),\n \"rules\" : [\n {\n \"action\" : (\"accept\", \"discard\"),\n \"fields\" : [\n {\n \"name\" : \"fieldName\",\n \"match\" : \"regex\"\n }\n ]\n }\n ]\n}\n</pre>\n\n<p>\nA filter has a default action and a list of rules, in order of precedence.\nEach rule defines an action, and a list of fields to match against the\naudit event.\n</p>\n\n<p>\nA rule is \"accepted\" if all the listed field entries match the audit\nevent. At that point, the action declared by the rule is taken.\n</p>\n\n<p>\nIf no rules match the event, the default action is taken. Actions\ndefault to \"accept\" if not defined in the JSON object.\n</p>\n\n<p>\nThe following is the list of fields that can be filtered for HDFS events:\n</p>\n\n<ul>\n <li>username: the user performing the action.</li>\n <li>ipAddress: the IP from where the request originated.</li>\n <li>command: the HDFS operation being performed.</li>\n <li>src: the source path for the operation.</li>\n <li>dest: the destination path for the operation.</li>\n <li>permissions: the permissions associated with the operation.</li>\n</ul>\n",
"display_name": "Event Filter",
"name": "navigator_audit_event_filter",
"value": "{\n \"comment\" : [\n \"Default filter for HDFS services.\",\n \"Discards events generated by the internal Cloudera and/or HDFS users\",\n \"(hdfs, hbase, mapred and dr.who), and events that affect files in \",\n \"/tmp directory.\"\n ],\n \"defaultAction\" : \"accept\",\n \"rules\" : [\n {\n \"action\" : \"discard\",\n \"fields\" : [\n { \"name\" : \"username\", \"match\" : \"(?:cloudera-scm|hbase|hdfs|mapred|hive|dr.who)(?:/.+)?\" }\n ]\n },\n {\n \"action\" : \"discard\",\n \"fields\" : [\n { \"name\" : \"src\", \"match\" : \"/tmp(?:/.*)?\" }\n ]\n }\n ]\n}\n"
},
{
"desc": "The password for the SSL keystore.",
"display_name": "Hadoop User Group Mapping LDAP SSL Keystore Password",
"name": "hadoop_group_mapping_ldap_keystore_passwd",
"value": ""
},
{
"desc": "Comma-delimited list of hosts where you want to allow the Hue user to impersonate other users. The default '*' allows all hosts. To disable entirely, use a string that doesn't correspond to a host name, such as '_no_host'.",
"display_name": "Hue Proxy User Hosts",
"name": "hue_proxy_user_hosts_list",
"value": "*"
},
{
"desc": "The service monitor will use this directory to create files to test if the hdfs service is healthy. The directory and files are created with permissions specified by 'HDFS Health Canary Directory Permissions'",
"display_name": "HDFS Health Canary Directory",
"name": "firehose_hdfs_canary_directory",
"value": "/tmp/.cloudera_health_monitoring_canary_files"
},
{
"desc": "Path to the directory where audit logs will be written. The directory will be created if it doesn't exist.",
"display_name": "Audit Log Directory",
"name": "audit_event_log_dir",
"value": "/var/log/hadoop-hdfs/audit"
},
{
"desc": "Class for user to group mapping (get groups for a given user).",
"display_name": "Hadoop User Group Mapping Implementation",
"name": "hadoop_security_group_mapping",
"value": "org.apache.hadoop.security.ShellBasedUnixGroupsMapping"
},
{
"desc": "Allows the oozie superuser to impersonate any members of a comma-delimited list of groups. The default '*' allows all groups. To disable entirely, use a string that doesn't correspond to a group name, such as '_no_group_'.",
"display_name": "Oozie Proxy User Groups",
"name": "oozie_proxy_user_groups_list",
"value": "*"
},
{
"desc": "Comma-separated list of compression codecs that can be used in job or map compression.",
"display_name": "Compression Codecs",
"name": "io_compression_codecs",
"value": "org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.Lz4Codec"
},
{
"desc": "Comma-separated list of users authorized to used Hadoop. This is emitted only if authorization is enabled.",
"display_name": "Authorized Users",
"name": "hadoop_authorized_users",
"value": "*"
},
{
"desc": "Enable HDFS short circuit read. This allows a client co-located with the DataNode to read HDFS file blocks directly. This gives a performance boost to distributed clients that are aware of locality.",
"display_name": "Enable HDFS Short Circuit Read",
"name": "dfs_datanode_read_shortcircuit",
"value": "true"
},
{
"desc": "The distinguished name of the user to bind as when connecting to the LDAP server. This may be left blank if the LDAP server supports anonymous binds.",
"display_name": "Hadoop User Group Mapping LDAP Bind User",
"name": "hadoop_group_mapping_ldap_bind_user",
"value": ""
},
{
"desc": "When set, Cloudera Manager will send alerts when the health of this service reaches the threshold specified by the EventServer setting eventserver_health_events_alert_threshold",
"display_name": "Enable Service Level Health Alerts",
"name": "enable_alerts",
"value": "true"
},
{
"desc": "The password of the bind user.",
"display_name": "Hadoop User Group Mapping LDAP Bind User Password",
"name": "hadoop_group_mapping_ldap_bind_passwd",
"value": ""
},
{
"desc": "Action to take when the audit event queue is full. Drop the event or shutdown the affected process.",
"display_name": "Queue Policy",
"name": "navigator_audit_queue_policy",
"value": "DROP"
},
{
"desc": "When set, each role will identify important log events and forward them to Cloudera Manager.",
"display_name": "Enable Log Event Capture",
"name": "catch_events",
"value": "true"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>core-site.xml</strong>. Applies to all roles and client configurations in this HDFS service as well as all its dependent services. Any configs added here will be overridden by their default values in HDFS (which can be found in hdfs-default.xml).",
"display_name": "Cluster-wide Advanced Configuration Snippet (Safety Valve) for core-site.xml",
"name": "core_site_safety_valve",
"value": null
},
{
"desc": "The default block size in bytes for new HDFS files. Note that this value is also used as the HBase Region Server HLog block size.",
"display_name": "HDFS Block Size",
"name": "dfs_block_size",
"value": "134217728"
},
{
"desc": "Enable WebHDFS interface",
"display_name": "Enable WebHDFS",
"name": "dfs_webhdfs_enabled",
"value": "true"
},
{
"desc": "The name of the group of superusers.",
"display_name": "Superuser Group",
"name": "dfs_permissions_supergroup",
"value": "supergroup"
},
{
"desc": "Typically, HDFS clients and servers communicate by opening sockets via an IP address. In certain networking configurations, it is preferable to open sockets after doing a DNS lookup on the hostname. Enable this property to open sockets after doing a DNS lookup on the hostname. This property is supported in CDH3u4 or later deployments.",
"display_name": "Use DataNode Hostname",
"name": "dfs_client_use_datanode_hostname",
"value": "false"
},
{
"desc": "Enter a FailoverProxyProvider implementation to configure two URIs to connect to during fail-over. The first configured address is tried first, and on a fail-over event the other address is tried.",
"display_name": "FailoverProxyProvider Class",
"name": "dfs_ha_proxy_provider",
"value": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
},
{
"desc": "The search base for the LDAP connection. This is a distinguished name, and will typically be the root of the LDAP directory.",
"display_name": "Hadoop User Group Mapping Search Base",
"name": "hadoop_group_mapping_ldap_base",
"value": ""
},
{
"desc": "If false, permission checking is turned off for files in HDFS.",
"display_name": "Check HDFS Permissions",
"name": "dfs_permissions",
"value": "true"
},
{
"desc": "Comma-delimited list of groups that you want to allow the Hue user to impersonate. The default '*' allows all groups. To disable entirely, use a string that doesn't correspond to a group name, such as '_no_group_'.",
"display_name": "Hue Proxy User Groups",
"name": "hue_proxy_user_groups_list",
"value": "*"
},
{
"desc": "Comma-separated list of groups authorized to used Hadoop. This is emitted only if authorization is enabled.",
"display_name": "Authorized Groups",
"name": "hadoop_authorized_groups",
"value": ""
},
{
"desc": "Comma-delimited list of hosts where you want to allow the oozie user to impersonate other users. The default '*' allows all hosts. To disable entirely, use a string that doesn't correspond to a host name, such as '_no_host'.",
"display_name": "Oozie Proxy User Hosts",
"name": "oozie_proxy_user_hosts_list",
"value": "*"
},
{
"desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.",
"display_name": "Enable Configuration Change Alerts",
"name": "enable_config_alerts",
"value": "false"
},
{
"desc": "Comma-delimited list of groups that you want to allow the mapred user to impersonate. The default '*' allows all groups. To disable entirely, use a string that doesn't correspond to a group name, such as '_no_group_'.",
"display_name": "Mapred Proxy User Groups",
"name": "mapred_proxy_user_groups_list",
"value": "*"
},
{
"desc": "For advanced use only, key-value pairs (one on each line) to be inserted into a role's environment. Applies to configurations of all roles in this service except client configuration.",
"display_name": "HDFS Service Environment Advanced Configuration Snippet (Safety Valve)",
"name": "hdfs_service_env_safety_valve",
"value": null
},
{
"desc": "Additional mapping rules that will be inserted before rules generated from the list of trusted realms and before the default rule. After changing this value and restarting the service, any services depending on this one must be restarted as well. The hadoop.security.auth_to_local property is configured using this information.",
"display_name": "Additional Rules to Map Kerberos Principals to Short Names",
"name": "extra_auth_to_local_rules",
"value": null
},
{
"desc": "Maximum size of audit log file in MB before it is rolled over.",
"display_name": "Maximum Audit Log File Size",
"name": "navigator_audit_log_max_file_size",
"value": "100"
},
{
"desc": "Enables authentication for hadoop HTTP web-consoles for all roles of this service. <b>Note:</b> This is effective only if security is enabled for the HDFS service.",
"display_name": "Enable Authentication for HTTP Web-Consoles",
"name": "hadoop_secure_web_ui",
"value": "false"
},
{
"desc": "Quality of protection for secured RPC connections between NameNode and HDFS clients. For effective RPC protection, enable Kerberos authentication.",
"display_name": "Hadoop RPC Protection",
"name": "hadoop_rpc_protection",
"value": "authentication"
},
{
"desc": "Comma-delimited list of groups that you want to allow the Hive user to impersonate. The default '*' allows all groups. To disable entirely, use a string that doesn't correspond to a group name, such as '_no_group_'.",
"display_name": "Hive Proxy User Groups",
"name": "hive_proxy_user_groups_list",
"value": "*"
},
{
"desc": "Comma-separated list of users authorized to perform admin operations on Hadoop. This is emitted only if authorization is enabled.",
"display_name": "Authorized Admin Users",
"name": "hadoop_authorized_admin_users",
"value": "*"
},
{
"desc": "The health check thresholds of the number of missing blocks. Specified as a percentage of the total number of blocks.",
"display_name": "Missing Block Monitoring Thresholds",
"name": "hdfs_missing_blocks_thresholds",
"value": "{\"critical\":\"any\",\"warning\":\"never\"}"
},
{
"desc": "The amount of time after NameNode(s) start that the lack of an active NameNode will be tolerated. This is intended to allow either the auto-failover daemon to make a NameNode active, or a specifically issued failover command to take effect. This is an advanced option that does not often need to be changed.",
"display_name": "NameNode Activation Startup Tolerance",
"name": "hdfs_namenode_activation_startup_tolerance",
"value": "180"
},
{
"desc": "Comma-delimited list of groups that you want to allow the HttpFS user to impersonate. The default '*' allows all groups. To disable entirely, use a string that doesn't correspond to a group name, such as '_no_group_'.",
"display_name": "HttpFS Proxy User Groups",
"name": "httpfs_proxy_user_groups_list",
"value": "*"
},
{
"desc": "Allows the flume user to impersonate any members of a comma-delimited list of groups. The default '*' allows all groups. To disable entirely, use a string that doesn't correspond to a group name, such as '_no_group_'.",
"display_name": "Flume Proxy User Groups",
"name": "flume_proxy_user_groups_list",
"value": "*"
},
{
"desc": "Comma-delimited list of hosts where you want to allow the mapred user to impersonate other users. The default '*' allows all hosts. To disable entirely, use a string that doesn't correspond to a host name, such as '_no_host'.",
"display_name": "Mapred Proxy User Hosts",
"name": "mapred_proxy_user_hosts_list",
"value": "*"
},
{
"desc": "For advanced use only, a list of configuration properties that will be used by the Service Monitor instead of the current client configuration for the service.",
"display_name": "Service Monitor Client Config Overrides",
"name": "smon_client_config_overrides",
"value": "<property><name>dfs.socket.timeout</name><value>3000</value></property><property><name>dfs.datanode.socket.write.timeout</name><value>3000</value></property><property><name>ipc.client.connect.max.retries</name><value>1</value></property><property><name>fs.permissions.umask-mode</name><value>000</value></property>"
},
{
"desc": "<p>The configured triggers for this service. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.</p><p>Each trigger has all of the following fields:</p><ul><li><span class='code'>triggerName</span> <strong>(mandatory)</strong> - the name of the trigger. This value must be unique for the specific service. </li><li><span class='code'>triggerExpression</span> <strong>(mandatory)</strong> - a tsquery expression representing the trigger. <li><span class='code'>streamThreshold</span> <strong>(optional)</strong> - the maximum number of streams that can satisfy a condition of a trigger before the condition fires. By default set to 0, and any stream returned will cause the condition to fire. <li><span class='code'>enabled</span> <strong> (optional)</strong> - by default set to 'true'. If set to 'false' the trigger will not be evaluated.</p><p>For example, here is a JSON formatted trigger that fires if there are more than 10 DataNodes with more than 500 file-descriptors opened:</p><p><pre>[{\"triggerName\": \"sample-trigger\",\n \"triggerExpression\": \"IF (SELECT fd_open WHERE roleType = DataNode and last(fd_open) > 500) DO health:red\",\n \"streamThreshold\": 10, \"enabled\": \"true\"}]</pre></p><p>Consult the trigger rules documentation for more details on how to write triggers using tsquery.</p><p>The JSON format is evolving and may change in the future and as a result backward compatibility is not guaranteed between releases at this time.</p>",
"display_name": "Service Triggers",
"name": "service_triggers",
"value": "[]"
},
{
"desc": "Comma-separated list of Kerberos realms that Hadoop services should trust. If empty, defaults to the configured default_realm in the krb5.conf file. After changing this value and restarting the service, any services depending on this one must be restarted as well. The hadoop.security.auth_to_local property is configured using this information.",
"display_name": "Trusted Kerberos Realms",
"name": "trusted_realms",
"value": ""
},
{
"desc": "For advanced use only, a list of derived configuration properties that will be used by the Service Monitor instead of the default ones.",
"display_name": "Service Monitor Derived Configs Advanced Configuration Snippet (Safety Valve)",
"name": "smon_derived_configs_safety_valve",
"value": null
},
{
"desc": "Enables the health check that verifies that the failover controllers associated with this service are healthy and running.",
"display_name": "Failover Controllers Healthy",
"name": "failover_controllers_healthy_enabled",
"value": "true"
},
{
"desc": "The attribute of the group object that identifies the users that are members of the group. The default will usually be appropriate for any LDAP installation.",
"display_name": "Hadoop User Group Mapping LDAP Group Membership Attribute",
"name": "hadoop_group_mapping_ldap_member_attr",
"value": "member"
},
{
"desc": "Comma separated list of users allowed to do short circuit read. A short circuit read allows a client co-located with the data to read HDFS file blocks directly from HDFS. If empty, will default to the DataNode process' user.",
"display_name": "DataNode Local Path Access Users",
"name": "dfs_block_local_path_access_user",
"value": null
},
{
"desc": "The timeout, in milliseconds, to use with the Cloudera Manager agent-based fencer.",
"display_name": "Timeout for Cloudera Manager Fencing Strategy",
"name": "dfs_ha_fencing_cloudera_manager_timeout_millis",
"value": "10000"
},
{
"desc": "Enable collection of audit events from the service's roles.",
"display_name": "Enable Collection",
"name": "navigator_audit_enabled",
"value": "true"
},
{
"desc": "Maximum bandwidth used for image transfer in bytes per second. This can help keep normal namenode operations responsive during checkpointing. A default value of 0 indicates that throttling is disabled.",
"display_name": "FsImage Transfer Bandwidth",
"name": "dfs_image_transfer_bandwidthPerSec",
"value": "0"
},
{
"desc": "The user the management services will impersonate as when connecting to HDFS. Defaults to 'hdfs', a superuser.",
"display_name": "HDFS User to Impersonate",
"name": "hdfs_user_to_impersonate",
"value": "hdfs"
},
{
"desc": "File path to the SSL keystore containing the SSL certificate required by the LDAP server.",
"display_name": "Hadoop User Group Mapping LDAP SSL Keystore",
"name": "hadoop_group_mapping_ldap_keystore",
"value": ""
},
{
"desc": "The short name of Hue's Kerberos principal",
"display_name": "Hue's Kerberos Principal Short Name",
"name": "hue_kerberos_principal_shortname",
"value": "hue"
},
{
"desc": "The minimal block replication.",
"display_name": "Minimal Block Replication",
"name": "dfs_replication_min",
"value": "1"
},
{
"desc": "The maximal block replication.",
"display_name": "Maximal Block Replication",
"name": "dfs_replication_max",
"value": "512"
},
{
"desc": "The service monitor will use these permissions to create the directory and files to test if the hdfs service is healthy. Permissions are specified using the 10-character unix-symbolic format e.g. '-rwxr-xr-x'.",
"display_name": "HDFS Health Canary Directory Permissions",
"name": "firehose_hdfs_canary_directory_permissions",
"value": "-rwxrwxrwx"
},
{
"desc": "Enable authorization",
"display_name": "Hadoop Secure Authorization",
"name": "hadoop_security_authorization",
"value": "false"
},
{
"desc": "The attribute of the group object that identifies the group name. The default will usually be appropriate for all LDAP systems.",
"display_name": "Hadoop User Group Mapping LDAP Group Name Attribute",
"name": "hadoop_group_mapping_ldap_group_name_attr",
"value": "cn"
},
{
"desc": "Enables DataNode support for the experimental DistributedFileSystem.getFileVBlockStorageLocations API. Applicable to CDH 4.1 and onwards.",
"display_name": "Enable HDFS Block Metadata API",
"name": "dfs_datanode_hdfs_blocks_metadata_enabled",
"value": "true"
},
{
"desc": "The tolerance window that will be used in HDFS service tests that depend on detection of the active NameNode.",
"display_name": "Active NameNode Detection Window",
"name": "hdfs_active_namenode_detecton_window",
"value": "3"
},
{
"desc": "Default block replication. The number of replications to make when the file is created. The default value is used if a replication number is not specified.",
"display_name": "Replication Factor",
"name": "dfs_replication",
"value": "3"
},
{
"desc": "Comma-delimited list of groups that you want to allow the HTTP user to impersonate. The default '*' allows all groups. To disable entirely, use a string that doesn't correspond to a group name, such as '_no_group_'. This is used by WebHCat.",
"display_name": "HTTP Proxy User Groups",
"name": "HTTP_proxy_user_groups_list",
"value": "*"
},
{
"desc": "The name of the system group shared by all the core Hadoop services.",
"display_name": "Shared Hadoop Group Name",
"name": "hdfs_hadoop_group_name",
"value": "hadoop"
},
{
"desc": "The amount of time to wait for HDFS filesystem image transfer from NameNode to complete.",
"display_name": "FsImage Transfer Timeout",
"name": "dfs_image_transfer_timeout",
"value": "60000"
},
{
"desc": "Comma-delimited list of hosts where you want to allow the Hive user to impersonate other users. The default '*' allows all hosts. To disable entirely, use a string that doesn't correspond to a host name, such as '_no_host'.",
"display_name": "Hive Proxy User Hosts",
"name": "hive_proxy_user_hosts_list",
"value": "*"
},
{
"desc": "An additional filter to use when searching for LDAP users. The default will usually be appropriate for Active Directory installations. If connecting to a generic LDAP server, ''sAMAccountName'' will likely be replaced with ''uid''. {0} is a special string used to denote where the username fits into the filter.",
"display_name": "Hadoop User Group Mapping LDAP User Search Filter",
"name": "hadoop_group_mapping_ldap_user_filter",
"value": "(&(objectClass=user)(sAMAccountName={0}))"
},
{
"desc": "List of fencing methods to use for service fencing. <tt>shell(./cloudera_manager_agent_fencer.py)</tt> is a fencing mechanism designed to take advantage of the CM agent. The <tt>sshfence</tt> method uses SSH. If using custom fencers (that may talk to shared store, power units, or network switches), use the shell mechanism to invoke them.",
"display_name": "HDFS High Availability Fencing Methods",
"name": "dfs_ha_fencing_methods",
"value": "shell(./cloudera_manager_agent_fencer.py)"
},
{
"desc": "For advanced use only, key-value pairs (one on each line) to be inserted into the environment of HDFS replication jobs.",
"display_name": "HDFS Replication Advanced Configuration Snippet (Safety Valve)",
"name": "hdfs_replication_env_safety_valve",
"value": null
},
{
"desc": "Enables the health check that a client can create, read, write, and delete files",
"display_name": "HDFS Canary Health Check",
"name": "hdfs_canary_health_enabled",
"value": "true"
},
{
"desc": "Path on the DataNode's local file system to a UNIX domain socket used for communication between the DataNode and local HDFS clients. This socket is used for Short Circuit Reads. Only the HDFS System User and \"root\" should have write access to the parent directory and all of its ancestors. This property is supported in CDH 4.2 or later deployments.",
"display_name": "UNIX Domain Socket path",
"name": "dfs_domain_socket_path",
"value": "/var/run/hdfs-sockets/dn"
},
{
"desc": "Algorithm to encrypt data transfer between DataNodes and clients, and among DataNodes. 3des is more cryptographically secure, but rc4 is substantially faster.",
"display_name": "Data Transfer Encryption Algorithm",
"name": "dfs_encrypt_data_transfer_algorithm",
"value": "rc4"
},
{
"desc": "The health check thresholds of the number of under-replicated blocks. Specified as a percentage of the total number of blocks.",
"display_name": "Under-replicated Block Monitoring Thresholds",
"name": "hdfs_under_replicated_blocks_thresholds",
"value": "{\"critical\":\"40.0\",\"warning\":\"10.0\"}"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>hdfs-site.xml</strong>. Applies to configurations of all roles in this service except client configuration.",
"display_name": "HDFS Service Advanced Configuration Snippet (Safety Valve) for hdfs-site.xml",
"name": "hdfs_service_config_safety_valve",
"value": null
},
{
"desc": "Comma-delimited list of hosts where you want to allow the HTTP user to impersonate other users. The default '*' allows all hosts. To disable entirely, use a string that doesn't correspond to a host name, such as '_no_host'. This is used by WebHCat.",
"display_name": "HTTP Proxy User Hosts",
"name": "HTTP_proxy_user_hosts_list",
"value": "*"
},
{
"desc": "<p>\nConfigures the rules for event tracking and coalescing. This feature is\nused to define equivalency between different audit events. When\nevents match, according to a set of configurable parameters, only one\nentry in the audit list is generated for all the matching events.\n</p>\n\n<p>\nTracking works by keeping a reference to events when they first appear,\nand comparing other incoming events against the \"tracked\" events according\nto the rules defined here.\n</p>\n\n<p>Event trackers are defined in a JSON object like the following:</p>\n\n<pre>\n{\n \"timeToLive\" : [integer],\n \"fields\" : [\n {\n \"type\" : [string],\n \"name\" : [string]\n }\n ]\n}\n</pre>\n\n<p>\nWhere:\n</p>\n\n<ul>\n <li>timeToLive: maximum amount of time an event will be tracked, in\n milliseconds. Must be provided. This defines how long, since it's\n first seen, an event will be tracked. A value of 0 disables tracking.</li>\n\n <li>fields: list of fields to compare when matching events against\n tracked events.</li>\n</ul>\n\n<p>\nEach field has an evaluator type associated with it. The evaluator defines\nhow the field data is to be compared. The following evaluators are\navailable:\n</p>\n\n<ul>\n <li>value: uses the field value for comparison.</li>\n\n <li>username: treats the field value as a user name, and ignores any\n host-specific data. This is useful for environment using Kerberos,\n so that only the principal name and realm are compared.</li>\n</ul>\n\n<p>\nThe following is the list of fields that can be used to compare HDFS events:\n</p>\n\n<ul>\n <li>username: the user performing the action.</li>\n <li>ipAddress: the IP from where the request originated.</li>\n <li>command: the HDFS operation being performed.</li>\n <li>src: the source path for the operation.</li>\n <li>dest: the destination path for the operation.</li>\n <li>permissions: the permissions associated with the operation.</li>\n</ul>\n",
"display_name": "Event Tracker",
"name": "navigator_event_tracker",
"value": "{\n \"comment\" : [\n \"Default event tracker for HDFS services.\",\n \"Defines equality by comparing username, operation and source path \",\n \"of the events.\"\n ],\n \"timeToLive\" : 60000,\n \"fields\" : [\n { \"type\": \"value\", \"name\" : \"src\" },\n { \"type\": \"value\", \"name\" : \"operation\" },\n { \"type\": \"username\", \"name\" : \"username\" }\n ]\n}\n"
},
{
"desc": "Choose the authentication mechanism used by Hadoop",
"display_name": "Hadoop Secure Authentication",
"name": "hadoop_security_authentication",
"value": "simple"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>hadoop-policy.xml</strong>. Applies to configurations of all roles in this service except client configuration.",
"display_name": "HDFS Service Advanced Configuration Snippet (Safety Valve) for hadoop-policy.xml",
"name": "hadoop_policy_config_safety_valve",
"value": null
},
{
"desc": "Enable encryption of data transfer between DataNodes and clients, and among DataNodes. For effective data transfer protection, enable Kerberos authentication and choose Privacy Quality of RPC Protection.",
"display_name": "Enable Data Transfer Encryption",
"name": "dfs_encrypt_data_transfer",
"value": "false"
},
{
"desc": "When computing the overall HDFS cluster health, consider the active NameNode's health",
"display_name": "Active NameNode Role Health Check",
"name": "hdfs_namenode_health_enabled",
"value": "true"
},
{
"desc": "The home directory of the system user on the local filesystem. This setting must reflect the system's configured value - only changing it here will not change the actual home directory.",
"display_name": "System User's Home Directory",
"name": "hdfs_user_home_dir",
"value": "/var/lib/hadoop-hdfs"
},
{
"desc": "When computing the overall HDFS cluster health, consider the health of the standby NameNode.",
"display_name": "Standby NameNode Health Check",
"name": "hdfs_standby_namenodes_health_enabled",
"value": "true"
},
{
"desc": "Comma-delimited list of hosts where you want to allow the flume user to impersonate other users. The default '*' allows all hosts. To disable entirely, use a string that doesn't correspond to a host name, such as '_no_host'.",
"display_name": "Flume Proxy User Hosts",
"name": "flume_proxy_user_hosts_list",
"value": "*"
},
{
"desc": "The URL for the LDAP server to use for resolving user groups when using LdapGroupsMapping.",
"display_name": "Hadoop User Group Mapping LDAP URL",
"name": "hadoop_group_mapping_ldap_url",
"value": ""
},
{
"desc": "SSH connection timeout, in milliseconds, to use with the built-in sshfence fencer.",
"display_name": "Timeout for SSH Fencing Strategy",
"name": "dfs_ha_fencing_ssh_connect_timeout",
"value": "30000"
},
{
"desc": "Maximum number of rolled over audit logs to retain. The logs will not be deleted if they contain audit events that have not yet been propagated to Audit Server.",
"display_name": "Number of Audit Logs to Retain",
"name": "navigator_audit_log_max_backup_index",
"value": "10"
},
{
"desc": "Comma-delimited list of hosts where you want to allow the HttpFS user to impersonate other users. The default '*' allows all hosts. To disable entirely, use a string that doesn't correspond to a host name, such as '_no_host'.",
"display_name": "HttpFS Proxy User Hosts",
"name": "httpfs_proxy_user_hosts_list",
"value": "*"
},
{
"desc": "Name of the ZooKeeper service that this HDFS service instance depends on",
"display_name": "ZooKeeper Service",
"name": "zookeeper_service",
"value": null
},
{
"desc": "The group that this service's processes should run as (except the HttpFS server, which has its own group)",
"display_name": "System Group",
"name": "process_groupname",
"value": "hdfs"
},
{
"desc": "The frequency in which the log4j event publication appender will retry sending undelivered log events to the Event server, in seconds",
"display_name": "Log Event Retry Frequency",
"name": "log_event_retry_frequency",
"value": "30"
},
{
"desc": "Default umask for file and directory creation, specified in an octal value (with a leading 0)",
"display_name": "Default Umask",
"name": "dfs_umaskmode",
"value": "022"
},
{
"desc": "The health check thresholds of free space in HDFS. Specified as a percentage of total HDFS capacity.",
"display_name": "HDFS Free Space Monitoring Thresholds",
"name": "hdfs_free_space_thresholds",
"value": "{\"critical\":\"10.0\",\"warning\":\"20.0\"}"
},
{
"desc": "The health check thresholds of the number of blocks that have at least one corrupt replica. Specified as a percentage of the total number of blocks.",
"display_name": "Blocks With Corrupt Replicas Monitoring Thresholds",
"name": "hdfs_blocks_with_corrupt_replicas_thresholds",
"value": "{\"critical\":\"1.0\",\"warning\":\"0.5\"}"
},
{
"desc": "Comma-separated list of groups authorized to perform admin operations on Hadoop. This is emitted only if authorization is enabled.",
"display_name": "Authorized Admin Groups",
"name": "hadoop_authorized_admin_groups",
"value": ""
},
{
"desc": "An additional filter to use when searching for groups.",
"display_name": "Hadoop User Group Mapping LDAP Group Search Filter",
"name": "hadoop_group_mapping_ldap_group_filter",
"value": "(objectClass=group)"
},
{
"desc": "For advanced use only, a string to be inserted into the client configuration for <strong>navigator.client.properties</strong>.",
"display_name": "HDFS Client Advanced Configuration Snippet (Safety Valve) for navigator.client.properties",
"name": "navigator_client_config_safety_valve",
"value": null
},
{
"desc": "The health test thresholds of the overall DataNode health. The check returns \"Concerning\" health if the percentage of \"Healthy\" DataNodes falls below the warning threshold. The check is unhealthy if the total percentage of \"Healthy\" and \"Concerning\" DataNodes falls below the critical threshold.",
"display_name": "Healthy DataNode Monitoring Thresholds",
"name": "hdfs_datanodes_healthy_thresholds",
"value": "{\"critical\":\"90.0\",\"warning\":\"95.0\"}"
},
{
"desc": "The SSH private key files to use with the built-in sshfence fencer. These are to be accessible to the <tt>hdfs</tt> user on the machines running the NameNodes.",
"display_name": "Private Keys for SSH Fencing Strategy",
"name": "dfs_ha_fencing_ssh_private_key_files",
"value": null
},
{
"desc": "Whether or not to use SSL when connecting to the LDAP server.",
"display_name": "Hadoop User Group Mapping LDAP SSL Enabled",
"name": "hadoop_group_mapping_ldap_use_ssl",
"value": "false"
}
]

View File

@ -0,0 +1,314 @@
[
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory. Specified as a percentage of the capacity on that filesystem. This setting is not used if a Log Directory Free Space Monitoring Absolute Thresholds setting is configured.",
"display_name": "Log Directory Free Space Monitoring Percentage Thresholds",
"name": "log_directory_free_space_percentage_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"never\"}"
},
{
"desc": "Enable SMTP authentication for Oozie email action",
"display_name": "Oozie Email Action SMTP Authentication Enabled",
"name": "oozie_email_smtp_auth",
"value": "false"
},
{
"desc": "Weight for the read I/O requests issued by this role. The greater the weight, the higher the priority of the requests when the host experiences I/O contention. Must be between 100 and 1000. Defaults to 1000 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup I/O Weight",
"name": "rm_io_weight",
"value": "500"
},
{
"desc": "When computing the overall Oozie Server health, consider the host's health.",
"display_name": "Oozie Server Host Health Test",
"name": "oozie_server_host_health_enabled",
"value": "true"
},
{
"desc": "Password for connecting to the database used by Oozie Server. Does not apply if you are using Derby as the database type.",
"display_name": "Oozie Server Database Password",
"name": "oozie_database_password",
"value": ""
},
{
"desc": "Comma-delimited list of groups that you want to allow the Hue user to impersonate. The default '*' allows all groups. To disable entirely, use a string that doesn't correspond to a group name, such as '_no_group_'.",
"display_name": "Hue Proxy User Groups",
"name": "hue_proxy_user_groups_list",
"value": "*"
},
{
"desc": "SMTP username for Oozie email action",
"display_name": "Oozie Email Action SMTP Authentication Username",
"name": "oozie_email_smtp_username",
"value": null
},
{
"desc": "Enables the health test that the Oozie Server's process state is consistent with the role configuration",
"display_name": "Oozie Server Process Health Test",
"name": "oozie_server_scm_health_enabled",
"value": "true"
},
{
"desc": "Number of threads used for executing callables",
"display_name": "Number Threads For Executing Callables",
"name": "oozie_service_callablequeueservice_threads",
"value": "10"
},
{
"desc": "SMTP password for Oozie email action",
"display_name": "Oozie Email Action SMTP Authentication Password",
"name": "oozie_email_smtp_password",
"value": null
},
{
"desc": "Maximum concurrency for a given callable type. Each command is a callable type: submit, start, run, etc. Each action type is a callable type: MapReduce, SSH, sub-workflow, etc. All commands that use action executors (action-start, action-end. etc.) use the action type as the callable type.",
"display_name": "Maximum concurrency for a given callable type",
"name": "oozie_service_callablequeueservice_callable_concurrency",
"value": "3"
},
{
"desc": "When set, this role's process is automatically (and transparently) restarted in the event of an unexpected failure.",
"display_name": "Automatically Restart Process",
"name": "process_auto_restart",
"value": "false"
},
{
"desc": "<p>The configured triggers for this role. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.</p><p>Each trigger has all of the following fields:</p><ul><li><span class='code'>triggerName</span> <strong>(mandatory)</strong> - the name of the trigger. This value must be unique for the specific role. </li><li><span class='code'>triggerExpression</span> <strong>(mandatory)</strong> - a tsquery expression representing the trigger. <li><span class='code'>streamThreshold</span> <strong>(optional)</strong> - the maximum number of streams that can satisfy a condition of a trigger before the condition fires. By default set to 0, and any stream returned will cause the condition to fire. <li><span class='code'>enabled</span> <strong> (optional)</strong> - by default set to 'true'. If set to 'false' the trigger will not be evaluated.</p><p>For example, here is a JSON formatted trigger configured for a DataNode that fires if the DataNode has more than 1500 file-descriptors opened:</p><p><pre>[{\"triggerName\": \"sample-trigger\",\n \"triggerExpression\": \"IF (SELECT fd_open WHERE roleName=$ROLENAME and last(fd_open) > 1500) DO health:red\",\n \"streamThreshold\": 0}, \"enabled\": \"true\"]</pre></p><p>Consult the trigger rules documentation for more details on how to write triggers using tsquery.</p><p>The JSON format is evolving and may change in the future and as a result backward compatibility is not guaranteed between releases at this time.</p>",
"display_name": "Role Triggers",
"name": "role_triggers",
"value": "[]"
},
{
"desc": "Comma-delimited list of hosts where you want to allow the Hue user to impersonate other users. The default '*' allows all hosts. To disable entirely, use a string that doesn't correspond to a host name, such as '_no_host'.",
"display_name": "Hue Proxy User Hosts",
"name": "hue_proxy_user_hosts_list",
"value": "*"
},
{
"desc": "The admin port Oozie server runs.",
"display_name": "Oozie Admin Port",
"name": "oozie_admin_port",
"value": "11001"
},
{
"desc": "Soft memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process if and only if the host is facing memory pressure. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Soft Limit",
"name": "rm_memory_soft_limit",
"value": "-1"
},
{
"desc": "Number of CPU shares to assign to this role. The greater the number of shares, the larger the share of the host's CPUs that will be given to this role when the host experiences CPU contention. Must be between 2 and 262144. Defaults to 1024 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup CPU Shares",
"name": "rm_cpu_shares",
"value": "1024"
},
{
"desc": "If true, enables the Oozie Server web console. ExtJS 2.2 zip archive must be extracted to /var/lib/oozie on the same host as the Oozie Server.",
"display_name": "Enable Oozie Server Web Console",
"name": "oozie_web_console",
"value": "false"
},
{
"desc": "Username for connecting to the database used by Oozie Server. Does not apply if you are using Derby as the database type.",
"display_name": "Oozie Server Database User",
"name": "oozie_database_user",
"value": "sa"
},
{
"desc": "Hard memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Hard Limit",
"name": "rm_memory_hard_limit",
"value": "-1"
},
{
"desc": "Comma-separated list of Oozie plug-ins to be activated. If one plugin cannot be loaded, all the plugins are ignored.",
"display_name": "Oozie Server Plugins",
"name": "oozie_plugins_list",
"value": ""
},
{
"desc": "Directory where the Oozie Server will place its data. Only applicable when using Derby as the database type.",
"display_name": "Oozie Server Data Directory",
"name": "oozie_data_dir",
"value": "/var/lib/oozie/data"
},
{
"desc": "Path to directory where heap dumps are generated when java.lang.OutOfMemoryError error is thrown. This directory is automatically created if it doesn't exist. However, if this directory already exists, role user must have write access to this directory. If this directory is shared amongst multiple roles, it should have 1777 permissions. Note that the heap dump files are created with 600 permissions and are owned by the role user. The amount of free space in this directory should be greater than the maximum Java Process heap size configured for this role.",
"display_name": "Heap Dump Directory",
"name": "oom_heap_dump_dir",
"value": "/tmp"
},
{
"desc": "The health test thresholds of the number of file descriptors used. Specified as a percentage of file descriptor limit.",
"display_name": "File Descriptor Monitoring Thresholds",
"name": "oozie_server_fd_thresholds",
"value": "{\"critical\":\"70.0\",\"warning\":\"50.0\"}"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>log4j.properties</strong> for this role only.",
"display_name": "Oozie Server Logging Advanced Configuration Snippet (Safety Valve)",
"name": "log4j_safety_valve",
"value": null
},
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory.",
"display_name": "Log Directory Free Space Monitoring Absolute Thresholds",
"name": "log_directory_free_space_absolute_thresholds",
"value": "{\"critical\":\"5.36870912E9\",\"warning\":\"1.073741824E10\"}"
},
{
"desc": "Location of the keystore file on the local file system.",
"display_name": "Oozie SSL Keystore File",
"name": "oozie_https_keystore_file",
"value": "/var/lib/oozie/.keystore"
},
{
"desc": "The from address to be used for mailing all emails for Oozie email action",
"display_name": "Oozie Email Action From Address",
"name": "oozie_email_from_address",
"value": "oozie@localhost"
},
{
"desc": "Comma-separated list of SchemaService workflow extension schemas for additional action types.",
"display_name": "Oozie SchemaService Workflow Extension Schemas",
"name": "oozie_workflow_extension_schemas",
"value": "hive-action-0.2.xsd,sqoop-action-0.2.xsd,email-action-0.1.xsd,distcp-action-0.1.xsd,shell-action-0.1.xsd,ssh-action-0.1.xsd,distcp-action-0.2.xsd,hive-action-0.3.xsd,hive-action-0.4.xsd,hive-action-0.5.xsd,oozie-sla-0.1.xsd,oozie-sla-0.2.xsd,sqoop-action-0.3.xsd,sqoop-action-0.4.xsd,shell-action-0.2.xsd,shell-action-0.3.xsd,ssh-action-0.2.xsd"
},
{
"desc": "If configured, overrides the process soft and hard rlimits (also called ulimits) for file descriptors to the configured value.",
"display_name": "Maximum Process File Descriptors",
"name": "rlimit_fds",
"value": null
},
{
"desc": "When set, Cloudera Manager will send alerts when the health of this role reaches the threshold specified by the EventServer setting eventserver_health_events_alert_threshold",
"display_name": "Enable Health Alerts for this Role",
"name": "enable_alerts",
"value": "true"
},
{
"desc": "The SMTP server port to use for Oozie email action",
"display_name": "Oozie Email Action SMTP Port",
"name": "oozie_email_smtp_prt",
"value": "25"
},
{
"desc": "Port of the Oozie Server while using SSL.",
"display_name": "Oozie HTTPS Port",
"name": "oozie_https_port",
"value": "11443"
},
{
"desc": "The maximum number of rolled log files to keep for Oozie Server logs. Typically used by log4j.",
"display_name": "Oozie Server Maximum Log File Backups",
"name": "max_log_backup_index",
"value": "720"
},
{
"desc": "When set, a SIGKILL signal is sent to the role process when java.lang.OutOfMemoryError is thrown.",
"display_name": "Kill When Out of Memory",
"name": "oom_sigkill_enabled",
"value": "true"
},
{
"desc": "When set, generates heap dump file when java.lang.OutOfMemoryError is thrown.",
"display_name": "Dump Heap When Out of Memory",
"name": "oom_heap_dump_enabled",
"value": "false"
},
{
"desc": "The period to review when computing unexpected exits.",
"display_name": "Unexpected Exits Monitoring Period",
"name": "unexpected_exits_window",
"value": "5"
},
{
"desc": "The health test thresholds for unexpected exits encountered within a recent period specified by the unexpected_exits_window configuration for the role.",
"display_name": "Unexpected Exits Thresholds",
"name": "unexpected_exits_thresholds",
"value": "{\"critical\":\"any\",\"warning\":\"never\"}"
},
{
"desc": "Name of the database used by Oozie Server.",
"display_name": "Oozie Server Database Name",
"name": "oozie_database_name",
"value": "oozie"
},
{
"desc": "Directory where Oozie Server will place its log files.",
"display_name": "Oozie Server Log Directory",
"name": "oozie_log_dir",
"value": "/var/log/oozie"
},
{
"desc": "Maximum size for the Java Process heap memory. Passed to Java -Xmx. Measured in bytes.",
"display_name": "Java Heap Size of Oozie Server in Bytes",
"name": "oozie_java_heapsize",
"value": "1073741824"
},
{
"desc": "These arguments will be passed as part of the Java command line. Commonly, garbage collection flags or extra debugging flags would be passed here.",
"display_name": "Java Configuration Options for Oozie Server",
"name": "oozie_java_opts",
"value": ""
},
{
"desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.",
"display_name": "Enable Configuration Change Alerts",
"name": "enable_config_alerts",
"value": "false"
},
{
"desc": "Comma-separated list of ActionService executor extension classes. Only action types with associated executors can be used in workflows.",
"display_name": "Oozie ActionService Executor Extension Classes",
"name": "oozie_executor_extension_classes",
"value": "org.apache.oozie.action.hadoop.HiveActionExecutor,org.apache.oozie.action.hadoop.SqoopActionExecutor,org.apache.oozie.action.email.EmailActionExecutor,org.apache.oozie.action.hadoop.ShellActionExecutor,org.apache.oozie.action.hadoop.DistcpActionExecutor"
},
{
"desc": "Type of the database used by Oozie Server.",
"display_name": "Oozie Server Database Type",
"name": "oozie_database_type",
"value": "derby"
},
{
"desc": "Maximum callable queue size",
"display_name": "Maximum Callable Queue Size",
"name": "oozie_service_callablequeueservice_queue_size",
"value": "10000"
},
{
"desc": "Port of Oozie Server",
"display_name": "Oozie HTTP Port",
"name": "oozie_http_port",
"value": "11000"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>oozie-site.xml</strong> for this role only.",
"display_name": "Oozie Server Advanced Configuration Snippet (Safety Valve) for oozie-site.xml",
"name": "oozie_config_safety_valve",
"value": null
},
{
"desc": "Password for the keystore.",
"display_name": "Oozie SSL Keystore Password",
"name": "oozie_https_keystore_password",
"value": null
},
{
"desc": "The SMTP server host to use for Oozie email action",
"display_name": "Oozie Email Action SMTP Host",
"name": "oozie_email_smtp_host",
"value": "localhost"
},
{
"desc": "The minimum log level for Oozie Server logs",
"display_name": "Oozie Server Logging Threshold",
"name": "log_threshold",
"value": "INFO"
},
{
"desc": "Hostname of the database used by Oozie Server. If the port is non-default for your database type, use host:port notation. Does not apply if you are using Derby as the database type.",
"display_name": "Oozie Server Database Host",
"name": "oozie_database_host",
"value": "localhost"
}
]

View File

@ -0,0 +1,98 @@
[
{
"desc": "Namespace used by this Oozie service in ZooKeeper when High Availability is enabled.",
"display_name": "ZooKeeper Namespace",
"name": "oozie_zookeeper_namespace",
"value": "oozie"
},
{
"desc": "When set, Cloudera Manager will send alerts when the health of this service reaches the threshold specified by the EventServer setting eventserver_health_events_alert_threshold",
"display_name": "Enable Service Level Health Alerts",
"name": "enable_alerts",
"value": "true"
},
{
"desc": "Enable SSL for Oozie. <b>Note:</b>This is supported only from CDH 4.3 onwards.",
"display_name": "Use SSL",
"name": "oozie_use_ssl",
"value": "false"
},
{
"desc": "The group that this service's processes should run as.",
"display_name": "System Group",
"name": "process_groupname",
"value": "oozie"
},
{
"desc": "Use ACLs on Znode while a secure ZooKeeper is used for Oozie High Availability. <b>Note:</b> This config is not emitted if ZooKeeper is not secure.",
"display_name": "Use ACLs on Znode",
"name": "oozie_zk_secure",
"value": "true"
},
{
"desc": "For advanced use only, a list of derived configuration properties that will be used by the Service Monitor instead of the default ones.",
"display_name": "Service Monitor Derived Configs Advanced Configuration Snippet (Safety Valve)",
"name": "smon_derived_configs_safety_valve",
"value": null
},
{
"desc": "For advanced use only, key-value pairs (one on each line) to be inserted into a role's environment. Applies to configurations of all roles in this service except client configuration.",
"display_name": "Oozie Service Environment Advanced Configuration Snippet (Safety Valve)",
"name": "oozie_env_safety_valve",
"value": null
},
{
"desc": "Name of the ZooKeeper service that this Oozie service instance depends on",
"display_name": "ZooKeeper Service",
"name": "zookeeper_service",
"value": null
},
{
"desc": "A list of credential class mappings for CredentialsProvider.",
"display_name": "Oozie Credential Classes",
"name": "oozie_credential_classes",
"value": "hcat=org.apache.oozie.action.hadoop.HCatCredentials,hbase=org.apache.oozie.action.hadoop.HbaseCredentials,hive2=org.apache.oozie.action.hadoop.Hive2Credentials"
},
{
"desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.",
"display_name": "Enable Configuration Change Alerts",
"name": "enable_config_alerts",
"value": "false"
},
{
"desc": "Address of the load balancer used if Oozie HA is enabled. Should be specified in host:port format.",
"display_name": "Oozie Load Balancer",
"name": "oozie_load_balancer",
"value": null
},
{
"desc": "Coordinator Job Lookup trigger command is scheduled at this interval (in seconds).",
"display_name": "Coordinator Job Lookup Interval",
"name": "oozie_service_coord_lookup_interval",
"value": "300"
},
{
"desc": "The user that this service's processes should run as.",
"display_name": "System User",
"name": "process_username",
"value": "oozie"
},
{
"desc": "<p>The configured triggers for this service. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.</p><p>Each trigger has all of the following fields:</p><ul><li><span class='code'>triggerName</span> <strong>(mandatory)</strong> - the name of the trigger. This value must be unique for the specific service. </li><li><span class='code'>triggerExpression</span> <strong>(mandatory)</strong> - a tsquery expression representing the trigger. <li><span class='code'>streamThreshold</span> <strong>(optional)</strong> - the maximum number of streams that can satisfy a condition of a trigger before the condition fires. By default set to 0, and any stream returned will cause the condition to fire. <li><span class='code'>enabled</span> <strong> (optional)</strong> - by default set to 'true'. If set to 'false' the trigger will not be evaluated.</p><p>For example, here is a JSON formatted trigger that fires if there are more than 10 DataNodes with more than 500 file-descriptors opened:</p><p><pre>[{\"triggerName\": \"sample-trigger\",\n \"triggerExpression\": \"IF (SELECT fd_open WHERE roleType = DataNode and last(fd_open) > 500) DO health:red\",\n \"streamThreshold\": 10, \"enabled\": \"true\"}]</pre></p><p>Consult the trigger rules documentation for more details on how to write triggers using tsquery.</p><p>The JSON format is evolving and may change in the future and as a result backward compatibility is not guaranteed between releases at this time.</p>",
"display_name": "Service Triggers",
"name": "service_triggers",
"value": "[]"
},
{
"desc": "The health test thresholds of the overall Oozie Server health. The check returns \"Concerning\" health if the percentage of \"Healthy\" Oozie Servers falls below the warning threshold. The check is unhealthy if the total percentage of \"Healthy\" and \"Concerning\" Oozie Servers falls below the critical threshold.",
"display_name": "Healthy Oozie Server Monitoring Thresholds",
"name": "oozie_servers_healthy_thresholds",
"value": "{\"critical\":\"51.0\",\"warning\":\"99.0\"}"
},
{
"desc": "Service to run MapReduce jobs against",
"display_name": "MapReduce Service",
"name": "mapreduce_yarn_service",
"value": null
}
]

View File

@ -0,0 +1,34 @@
[
"dfs_block_size",
"dfs_umaskmode",
"dfs_webhdfs_enabled",
"dfs_permissions",
"io_compression_codecs",
"dfs_datanode_du_reserved",
"dfs_datanode_failed_volumes_tolerated",
"dfs_name_dir_restore",
"fs_trash_interval",
"dfs_safemode_min_datanodes",
"dfs_safemode_extension",
"dfs_access_time_precision",
"yarn_acl_enable",
"yarn_admin_acl",
"yarn_log_aggregation_enable",
"yarn_log_aggregation_retain_seconds",
"mapreduce_jobhistory_max_age_ms",
"mapreduce_jobhistory_cleaner_interval",
"yarn_nodemanager_container_manager_thread_count",
"yarn_nodemanager_delete_thread_count",
"yarn_nodemanager_heartbeat_interval_ms",
"yarn_nodemanager_localizer_cache_cleanup_interval_ms",
"yarn_nodemanager_localizer_client_thread_count",
"yarn_nodemanager_localizer_cache_target_size_mb",
"yarn_nodemanager_localizer_fetch_thread_count",
"yarn_nodemanager_log_retain_seconds",
"yarn_resourcemanager_client_thread_count",
"yarn_resourcemanager_scheduler_client_thread_count",
"yarn_resourcemanager_admin_client_thread_count",
"yarn_resourcemanager_amliveliness_monitor_interval_ms",
"yarn_am_liveness_monitor_expiry_interval_ms",
"yarn_resourcemanager_am_max_retries"
]

View File

@ -0,0 +1,308 @@
[
{
"desc": "For MapReduce job outputs that are compressed, specify the compression codec to use. Will be part of generated client configuration.",
"display_name": "Compression Codec of MapReduce Job Output",
"name": "mapred_output_compression_codec",
"value": "org.apache.hadoop.io.compress.DefaultCodec"
},
{
"desc": "The default number of parallel transfers run by reduce during the copy (shuffle) phase. This number should be between sqrt(nodes*number_of_map_slots_per_node) and nodes*number_of_map_slots_per_node/2. Will be part of generated client configuration.",
"display_name": "Default Number of Parallel Transfers During Shuffle",
"name": "mapred_reduce_parallel_copies",
"value": "10"
},
{
"desc": "The number of streams to merge at the same time while sorting files. That is, the number of sort heads to use during the merge sort on the reducer side. This determines the number of open file handles. Merging more files in parallel reduces merge sort iterations and improves run time by eliminating disk I/O. Note that merging more files in parallel uses more memory. If 'io.sort.factor' is set too high or the maximum JVM heap is set too low, excessive garbage collection will occur. The Hadoop default is 10, but Cloudera recommends a higher value. Will be part of generated client configuration.",
"display_name": "I/O Sort Factor",
"name": "io_sort_factor",
"value": "64"
},
{
"desc": "The priority level that the client configuration will have in the Alternatives system on the hosts. Higher priority levels will cause Alternatives to prefer this configuration over any others.",
"display_name": "Alternatives Priority",
"name": "client_config_priority",
"value": "92"
},
{
"desc": "Limit on the number of counters allowed per job.",
"display_name": "Job Counters Limit",
"name": "mapreduce_job_counters_limit",
"value": "120"
},
{
"desc": "The number of virtual CPU cores allocated for each map task of a job. This parameter has no effect prior to CDH 4.4.",
"display_name": "Map Task CPU Virtual Cores",
"name": "mapreduce_map_cpu_vcores",
"value": "1"
},
{
"desc": "If enabled, multiple instances of some reduce tasks may be executed in parallel.",
"display_name": "Reduce Tasks Speculative Execution",
"name": "mapred_reduce_tasks_speculative_execution",
"value": "false"
},
{
"desc": "Threshold for number of reduces, beyond which a job is considered too big for the ubertask optimization. <strong>Note: as of CDH5.0.0, MR2 cannot support more than one reduce in an ubertask.</strong> (Zero is a valid max.)",
"display_name": "Ubertask Maximum Reduces",
"name": "mapreduce_job_ubertask_maxreduces",
"value": "1"
},
{
"desc": "The application framework to run jobs with. If not set, jobs will be run with the local job runner.",
"display_name": "Application Framework",
"name": "mapreduce_framework_name",
"value": "yarn"
},
{
"desc": "Base sleep time between failover attempts. Used only if RM HA is enabled.",
"display_name": "Client Failover Sleep Base Time",
"name": "client_failover_sleep_base",
"value": "100"
},
{
"desc": "For advanced use only, a string to be inserted into the client configuration for <strong>yarn-site.xml</strong>.",
"display_name": "YARN Client Advanced Configuration Snippet (Safety Valve) for yarn-site.xml",
"name": "yarn_client_config_safety_valve",
"value": null
},
{
"desc": "Compression level for the codec used to compress MapReduce outputs. Default compression is a balance between speed and compression ratio.",
"display_name": "Compression Level of Codecs",
"name": "zlib_compress_level",
"value": "DEFAULT_COMPRESSION"
},
{
"desc": "Size of buffer for read and write operations of SequenceFiles.",
"display_name": "SequenceFile I/O Buffer Size",
"name": "io_file_buffer_size",
"value": "65536"
},
{
"desc": "For advanced use only, a string to be inserted into the client configuration for <strong>mapred-site.xml</strong>.",
"display_name": "MapReduce Client Advanced Configuration Snippet (Safety Valve) for mapred-site.xml",
"name": "mapreduce_client_config_safety_valve",
"value": null
},
{
"desc": "The maximum heap size, in bytes, of the Java MapReduce ApplicationMaster. This number will be formatted and concatenated with 'ApplicationMaster Java Opts Base' to pass to Hadoop.",
"display_name": "ApplicationMaster Java Maximum Heap Size",
"name": "yarn_app_mapreduce_am_max_heap",
"value": "825955249"
},
{
"desc": "The replication level for submitted job files.",
"display_name": "Mapreduce Submit Replication",
"name": "mapred_submit_replication",
"value": "10"
},
{
"desc": "The total amount of memory buffer, in megabytes, to use while sorting files. Note that this memory comes out of the user JVM heap size (meaning total user JVM heap - this amount of memory = total user usable heap space. Note that Cloudera's default differs from Hadoop's default; Cloudera uses a bigger buffer by default because modern machines often have more RAM. The smallest value across all TaskTrackers will be part of generated client configuration.",
"display_name": "I/O Sort Memory Buffer (MiB)",
"name": "io_sort_mb",
"value": "256"
},
{
"desc": "Whether to enable the small-jobs \"ubertask\" optimization, which runs \"sufficiently small\" jobs sequentially within a single JVM. \"Small\" is defined by the mapreduce.job.ubertask.maxmaps, mapreduce.job.ubertask.maxreduces, and mapreduce.job.ubertask.maxbytes settings.",
"display_name": "Enable Ubertask Optimization",
"name": "mapreduce_job_ubertask_enabled",
"value": "false"
},
{
"desc": "Java command line arguments passed to the MapReduce ApplicationMaster.",
"display_name": "ApplicationMaster Java Opts Base",
"name": "yarn_app_mapreduce_am_command_opts",
"value": "-Djava.net.preferIPv4Stack=true"
},
{
"desc": "The amount of physical memory, in MiB, allocated for each reduce task of a job. This parameter has no effect prior to CDH 4.4.",
"display_name": "Reduce Task Memory",
"name": "mapreduce_reduce_memory_mb",
"value": "1024"
},
{
"desc": "The maximum permissible size of the split metainfo file. The JobTracker won't attempt to read split metainfo files bigger than the configured value. No limits if set to -1.",
"display_name": "JobTracker MetaInfo Maxsize",
"name": "mapreduce_jobtracker_split_metainfo_maxsize",
"value": "10000000"
},
{
"desc": "Fraction of the number of map tasks in the job which should be completed before reduce tasks are scheduled for the job.",
"display_name": "Number of Map Tasks to Complete Before Reduce Tasks",
"name": "mapred_reduce_slowstart_completed_maps",
"value": "0.8"
},
{
"desc": "These are Java command line arguments. Commonly, garbage collection flags or extra debugging flags would be passed here.",
"display_name": "Client Java Configuration Options",
"name": "mapreduce_client_java_opts",
"value": "-Djava.net.preferIPv4Stack=true"
},
{
"desc": "The physical memory requirement, in MiB, for the ApplicationMaster.",
"display_name": "ApplicationMaster Memory",
"name": "yarn_app_mapreduce_am_resource_mb",
"value": "1024"
},
{
"desc": "The maximum Java heap size, in bytes, of the reduce processes. This number will be formatted and concatenated with 'Reduce Task Java Opts Base' to pass to Hadoop.",
"display_name": "Reduce Task Maximum Heap Size",
"name": "mapreduce_reduce_java_opts_max_heap",
"value": "825955249"
},
{
"desc": "Maximum size for the Java process heap memory. Passed to Java -Xmx. Measured in bytes.",
"display_name": "Client Java Heap Size in Bytes",
"name": "mapreduce_client_java_heapsize",
"value": "825955249"
},
{
"desc": "The number of milliseconds before a task will be terminated if it neither reads an input, writes an output, nor updates its status string.",
"display_name": "Mapreduce Task Timeout",
"name": "mapred_task_timeout",
"value": "600000"
},
{
"desc": "The virtual CPU cores requirement, for the ApplicationMaster. This parameter has no effect prior to CDH 4.4.",
"display_name": "ApplicationMaster Virtual CPU Cores",
"name": "yarn_app_mapreduce_am_resource_cpu_vcores",
"value": "1"
},
{
"desc": "The amount of physical memory, in MiB, allocated for each map task of a job.",
"display_name": "Map Task Memory",
"name": "mapreduce_map_memory_mb",
"value": "1024"
},
{
"desc": "The number of virtual CPU cores for each reduce task of a job.",
"display_name": "Reduce Task CPU Virtual Cores",
"name": "mapreduce_reduce_cpu_vcores",
"value": "1"
},
{
"desc": "For MapReduce map outputs that are compressed, specify the compression codec to use. Will be part of generated client configuration.",
"display_name": "Compression Codec of MapReduce Map Output",
"name": "mapred_map_output_compression_codec",
"value": "org.apache.hadoop.io.compress.SnappyCodec"
},
{
"desc": "Classpaths to include for MapReduce applications.",
"display_name": "MR Application Classpath",
"name": "mapreduce_application_classpath",
"value": "$HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,$MR2_CLASSPATH"
},
{
"desc": "When set, each role will identify important log events and forward them to Cloudera Manager.",
"display_name": "Enable Log Event Capture",
"name": "catch_events",
"value": "true"
},
{
"desc": "Maximum sleep time between failover attempts. Used only if RM HA is enabled.",
"display_name": "Client Failover Sleep Max Time",
"name": "client_failover_sleep_max",
"value": "2000"
},
{
"desc": "Location to store the job history files of running jobs. This is a path on the host where the JobTracker is running.",
"display_name": "Running Job History Location",
"name": "hadoop_job_history_dir",
"value": "/var/log/hadoop-mapreduce/history"
},
{
"desc": "Compress the output of MapReduce jobs. Will be part of generated client configuration.",
"display_name": "Compress MapReduce Job Output",
"name": "mapred_output_compress",
"value": "false"
},
{
"desc": "For MapReduce job outputs that are compressed as SequenceFiles, you can select one of these compression type options: NONE, RECORD or BLOCK. Cloudera recommends BLOCK. Will be part of generated client configuration.",
"display_name": "Compression Type of MapReduce Job Output",
"name": "mapred_output_compression_type",
"value": "BLOCK"
},
{
"desc": "Java opts for the map processes. The following symbol, if present, will be interpolated: @taskid@ is replaced by current TaskID. Any other occurrences of '@' will go unchanged. For example, to enable verbose gc logging to a file named for the taskid in /tmp pass a value of: \"-verbose:gc -Xloggc:/tmp/@taskid@.gc\". The configuration variable 'Map Task Memory' can be used to control the maximum memory of the map processes.",
"display_name": "Map Task Java Opts Base",
"name": "mapreduce_map_java_opts",
"value": "-Djava.net.preferIPv4Stack=true"
},
{
"desc": "Additional execution environment entries for map and reduce task processes.",
"display_name": "MR Application Environment",
"name": "mapreduce_admin_user_env",
"value": "LD_LIBRARY_PATH=$HADOOP_COMMON_HOME/lib/native:$JAVA_LIBRARY_PATH"
},
{
"desc": "The soft limit in either the buffer or record collection buffers. When this limit is reached, a thread will begin to spill the contents to disk in the background. Note that this does not imply any chunking of data to the spill. A value less than 0.5 is not recommended. The syntax is in decimal units; the default is 80% and is formatted 0.8. Will be part of generated client configuration.",
"display_name": "I/O Sort Spill Percent",
"name": "io_sort_spill_percent",
"value": "0.8"
},
{
"desc": "The default number of reduce tasks per job. Will be part of generated client configuration.",
"display_name": "Default Number of Reduce Tasks per Job",
"name": "mapred_reduce_tasks",
"value": "1"
},
{
"desc": "Maximum allowed connections for the shuffle. Set to 0 (zero) to indicate no limit on the number of connections.",
"display_name": "Max Shuffle Connections",
"name": "mapreduce_shuffle_max_connections",
"value": "80"
},
{
"desc": "For advanced use only, key-value pairs (one on each line) to be inserted into the client configuration for <strong>hadoop-env.sh</strong>.",
"display_name": "Gateway Client Environment Advanced Configuration Snippet for hadoop-env.sh (Safety Valve)",
"name": "mapreduce_client_env_safety_valve",
"value": null
},
{
"desc": "The maximum Java heap size, in bytes, of the map processes. This number will be formatted and concatenated with 'Map Task Java Opts Base' to pass to Hadoop.",
"display_name": "Map Task Maximum Heap Size",
"name": "mapreduce_map_java_opts_max_heap",
"value": "825955249"
},
{
"desc": "A shared directory for temporary files.",
"display_name": "Shared Temp Directories",
"name": "mapreduce_cluster_temp_dir",
"value": null
},
{
"desc": "Java opts for the reduce processes. The following symbol, if present, will be interpolated: @taskid@ is replaced by current TaskID. Any other occurrences of '@' will go unchanged. For example, to enable verbose gc logging to a file named for the taskid in /tmp pass a value of: \"-verbose:gc -Xloggc:/tmp/@taskid@.gc\". The configuration variable 'Reduce Task Memory' can be used to control the maximum memory of the reduce processes.",
"display_name": "Reduce Task Java Opts Base",
"name": "mapreduce_reduce_java_opts",
"value": "-Djava.net.preferIPv4Stack=true"
},
{
"desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.",
"display_name": "Enable Configuration Change Alerts",
"name": "enable_config_alerts",
"value": "false"
},
{
"desc": "If enabled, uses compression on the map outputs before they are sent across the network. Will be part of generated client configuration.",
"display_name": "Use Compression on Map Outputs",
"name": "mapred_compress_map_output",
"value": "true"
},
{
"desc": "Threshold for number of maps, beyond which a job is considered too big for the ubertask optimization.",
"display_name": "Ubertask Maximum Maps",
"name": "mapreduce_job_ubertask_maxmaps",
"value": "9"
},
{
"desc": "If enabled, multiple instances of some map tasks may be executed in parallel.",
"display_name": "Map Tasks Speculative Execution",
"name": "mapred_map_tasks_speculative_execution",
"value": "false"
},
{
"desc": "Threshold for number of input bytes, beyond which a job is considered too big for the ubertask optimization. If no value is specified, dfs.block.size is used as a default.",
"display_name": "Ubertask Maximum Job Size",
"name": "mapreduce_job_ubertask_maxbytes",
"value": null
}
]

View File

@ -0,0 +1,248 @@
[
{
"desc": "Maximum size for the Java Process heap memory. Passed to Java -Xmx. Measured in bytes.",
"display_name": "Java Heap Size of JobHistory Server in Bytes",
"name": "mr2_jobhistory_java_heapsize",
"value": "1073741824"
},
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory. Specified as a percentage of the capacity on that filesystem. This setting is not used if a Log Directory Free Space Monitoring Absolute Thresholds setting is configured.",
"display_name": "Log Directory Free Space Monitoring Percentage Thresholds",
"name": "log_directory_free_space_percentage_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"never\"}"
},
{
"desc": "<p>This file contains the rules which govern how log messages are turned into events by the custom log4j appender that this role loads. It is in JSON format, and is composed of a list of rules. Every log message is evaluated against each of these rules in turn to decide whether or not to send an event for that message.</p><p>Each rule has some or all of the following fields:</p><ul><li><span class='code'>alert</span> - whether or not events generated from this rule should be promoted to alerts. A value of \"true\" will cause alerts to be generated. If not specified, the default is \"false\".</li><li><span class='code'>rate</span> <strong>(mandatory)</strong> - the maximum number of log messages matching this rule that may be sent as events every minute. If more than <tt>rate</tt> matching log messages are received in a single minute, the extra messages are ignored. If rate is less than 0, the number of messages per minute is unlimited.</li><li><span class='code'>periodminutes</span> - the number of minutes during which the publisher will only publish <tt>rate</tt> events or fewer. If not specified, the default is <strong>one minute</strong></li><li><span class='code'>threshold</span> - apply this rule only to messages with this log4j severity level or above. An example is \"WARN\" for warning level messages or higher.</li><li><span class='code'>content</span> - match only those messages whose contents match this regular expression.</li><li><span class='code'>exceptiontype</span> - match only those messages which are part of an exception message. The exception type must match this regular expression.</li></ul><br/><p>Example:<span class='code'>{\"alert\": false, \"rate\": 10, \"exceptiontype\": \"java.lang.StringIndexOutOfBoundsException\"}</span></p><p>This rule will send events to Cloudera Manager for every <span class='code'>StringIndexOutOfBoundsException</span>, up to a maximum of 10 every minute.</p>",
"display_name": "Rules to Extract Events from Log Files",
"name": "log_event_whitelist",
"value": "{\n \"version\": \"0\",\n \"rules\": [\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"FATAL\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Instead, use .*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Use .* instead\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 2, \"exceptiontype\": \".*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\"}\n ]\n}\n"
},
{
"desc": "The period to review when computing the moving average of garbage collection time.",
"display_name": "Garbage Collection Duration Monitoring Period",
"name": "jobhistory_gc_duration_window",
"value": "5"
},
{
"desc": "The root HDFS directory of the staging area for users' MR2 jobs; for example /user. The staging directories are always named after the user.",
"display_name": "MapReduce ApplicationMaster Staging Root Directory",
"name": "yarn_app_mapreduce_am_staging_dir",
"value": "/user"
},
{
"desc": "Enables the health test that the JobHistory Server's process state is consistent with the role configuration",
"display_name": "JobHistory Server Process Health Test",
"name": "jobhistory_scm_health_enabled",
"value": "true"
},
{
"desc": "When computing the overall JobHistory Server health, consider the host's health.",
"display_name": "JobHistory Server Host Health Test",
"name": "jobhistory_host_health_enabled",
"value": "true"
},
{
"desc": "When set, this role's process is automatically (and transparently) restarted in the event of an unexpected failure.",
"display_name": "Automatically Restart Process",
"name": "process_auto_restart",
"value": "false"
},
{
"desc": "The maximum number of rolled log files to keep for JobHistory Server logs. Typically used by log4j.",
"display_name": "JobHistory Server Maximum Log File Backups",
"name": "max_log_backup_index",
"value": "10"
},
{
"desc": "<p>The configured triggers for this role. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.</p><p>Each trigger has all of the following fields:</p><ul><li><span class='code'>triggerName</span> <strong>(mandatory)</strong> - the name of the trigger. This value must be unique for the specific role. </li><li><span class='code'>triggerExpression</span> <strong>(mandatory)</strong> - a tsquery expression representing the trigger. <li><span class='code'>streamThreshold</span> <strong>(optional)</strong> - the maximum number of streams that can satisfy a condition of a trigger before the condition fires. By default set to 0, and any stream returned will cause the condition to fire. <li><span class='code'>enabled</span> <strong> (optional)</strong> - by default set to 'true'. If set to 'false' the trigger will not be evaluated.</p><p>For example, here is a JSON formatted trigger configured for a DataNode that fires if the DataNode has more than 1500 file-descriptors opened:</p><p><pre>[{\"triggerName\": \"sample-trigger\",\n \"triggerExpression\": \"IF (SELECT fd_open WHERE roleName=$ROLENAME and last(fd_open) > 1500) DO health:red\",\n \"streamThreshold\": 0}, \"enabled\": \"true\"]</pre></p><p>Consult the trigger rules documentation for more details on how to write triggers using tsquery.</p><p>The JSON format is evolving and may change in the future and as a result backward compatibility is not guaranteed between releases at this time.</p>",
"display_name": "Role Triggers",
"name": "role_triggers",
"value": "[]"
},
{
"desc": "Directory where JobHistory Server will place its log files.",
"display_name": "JobHistory Server Log Directory",
"name": "mr2_jobhistory_log_dir",
"value": "/var/log/hadoop-mapreduce"
},
{
"desc": "Time interval for history cleaner to check for files to delete. Files are only deleted if they are older than mapreduce.jobhistory.max-age-ms.",
"display_name": "Job History Files Cleaner Interval",
"name": "mapreduce_jobhistory_cleaner_interval",
"value": "86400000"
},
{
"desc": "The health test thresholds on the duration of the metrics request to the web server.",
"display_name": "Web Metric Collection Duration",
"name": "jobhistory_web_metric_collection_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"10000.0\"}"
},
{
"desc": "These arguments will be passed as part of the Java command line. Commonly, garbage collection flags or extra debugging flags would be passed here.",
"display_name": "Java Configuration Options for JobHistory Server",
"name": "mr2_jobhistory_java_opts",
"value": "-XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:-CMSConcurrentMTEnabled -XX:CMSInitiatingOccupancyFraction=70 -XX:+CMSParallelRemarkEnabled"
},
{
"desc": "Hard memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Hard Limit",
"name": "rm_memory_hard_limit",
"value": "-1"
},
{
"desc": "The period to review when computing unexpected exits.",
"display_name": "Unexpected Exits Monitoring Period",
"name": "unexpected_exits_window",
"value": "5"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>yarn-site.xml</strong> for this role only.",
"display_name": "JobHistory Server Advanced Configuration Snippet (Safety Valve) for yarn-site.xml",
"name": "jobhistory_config_safety_valve",
"value": null
},
{
"desc": "For advanced use only, a string to be inserted into <strong>log4j.properties</strong> for this role only.",
"display_name": "JobHistory Server Logging Advanced Configuration Snippet (Safety Valve)",
"name": "log4j_safety_valve",
"value": null
},
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory.",
"display_name": "Log Directory Free Space Monitoring Absolute Thresholds",
"name": "log_directory_free_space_absolute_thresholds",
"value": "{\"critical\":\"5.36870912E9\",\"warning\":\"1.073741824E10\"}"
},
{
"desc": "The port of the MapReduce JobHistory Server. Together with the host name of the JobHistory role forms the address. ",
"display_name": "MapReduce JobHistory Server Port",
"name": "mapreduce_jobhistory_address",
"value": "10020"
},
{
"desc": "The group that the JobHistory Server process should run as.",
"display_name": "System Group",
"name": "history_process_groupname",
"value": "hadoop"
},
{
"desc": "Job history files older than this time duration will deleted when the history cleaner runs.",
"display_name": "Job History Files Maximum Age",
"name": "mapreduce_jobhistory_max_age_ms",
"value": "604800000"
},
{
"desc": "When set, Cloudera Manager will send alerts when the health of this role reaches the threshold specified by the EventServer setting eventserver_health_events_alert_threshold",
"display_name": "Enable Health Alerts for this Role",
"name": "enable_alerts",
"value": "true"
},
{
"desc": "The maximum size, in megabytes, per log file for JobHistory Server logs. Typically used by log4j.",
"display_name": "JobHistory Server Max Log Size",
"name": "max_log_size",
"value": "200"
},
{
"desc": "When set, a SIGKILL signal is sent to the role process when java.lang.OutOfMemoryError is thrown.",
"display_name": "Kill When Out of Memory",
"name": "oom_sigkill_enabled",
"value": "true"
},
{
"desc": "When set, generates heap dump file when java.lang.OutOfMemoryError is thrown.",
"display_name": "Dump Heap When Out of Memory",
"name": "oom_heap_dump_enabled",
"value": "false"
},
{
"desc": "The health test thresholds of the number of file descriptors used. Specified as a percentage of file descriptor limit.",
"display_name": "File Descriptor Monitoring Thresholds",
"name": "jobhistory_fd_thresholds",
"value": "{\"critical\":\"70.0\",\"warning\":\"50.0\"}"
},
{
"desc": "The health test thresholds for unexpected exits encountered within a recent period specified by the unexpected_exits_window configuration for the role.",
"display_name": "Unexpected Exits Thresholds",
"name": "unexpected_exits_thresholds",
"value": "{\"critical\":\"any\",\"warning\":\"never\"}"
},
{
"desc": "The port of the MapReduce JobHistory Server web application. Together with the host name of the JobHistory role forms the address.",
"display_name": "MapReduce JobHistory Webapp Port",
"name": "mapreduce_jobhistory_webapp_address",
"value": "19888"
},
{
"desc": "Enables the health test that the Cloudera Manager Agent can successfully contact and gather metrics from the web server.",
"display_name": "Web Metric Collection",
"name": "jobhistory_web_metric_collection_enabled",
"value": "true"
},
{
"desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.",
"display_name": "Enable Configuration Change Alerts",
"name": "enable_config_alerts",
"value": "false"
},
{
"desc": "The minimum log level for JobHistory Server logs",
"display_name": "JobHistory Server Logging Threshold",
"name": "log_threshold",
"value": "INFO"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>mapred-site.xml</strong> for this role only.",
"display_name": "JobHistory Server Advanced Configuration Snippet (Safety Valve) for mapred-site.xml",
"name": "jobhistory_mapred_safety_valve",
"value": null
},
{
"desc": "If enabled, the JobHistory Server binds to the wildcard address (\"0.0.0.0\") on all of its ports.",
"display_name": "Bind JobHistory Server to Wildcard Address",
"name": "yarn_jobhistory_bind_wildcard",
"value": "false"
},
{
"desc": "Weight for the read I/O requests issued by this role. The greater the weight, the higher the priority of the requests when the host experiences I/O contention. Must be between 100 and 1000. Defaults to 1000 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup I/O Weight",
"name": "rm_io_weight",
"value": "500"
},
{
"desc": "Number of CPU shares to assign to this role. The greater the number of shares, the larger the share of the host's CPUs that will be given to this role when the host experiences CPU contention. Must be between 2 and 262144. Defaults to 1024 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup CPU Shares",
"name": "rm_cpu_shares",
"value": "1024"
},
{
"desc": "Path to directory where heap dumps are generated when java.lang.OutOfMemoryError error is thrown. This directory is automatically created if it doesn't exist. However, if this directory already exists, role user must have write access to this directory. If this directory is shared amongst multiple roles, it should have 1777 permissions. Note that the heap dump files are created with 600 permissions and are owned by the role user. The amount of free space in this directory should be greater than the maximum Java Process heap size configured for this role.",
"display_name": "Heap Dump Directory",
"name": "oom_heap_dump_dir",
"value": "/tmp"
},
{
"desc": "The user that the JobHistory Server process should run as.",
"display_name": "System User",
"name": "history_process_username",
"value": "mapred"
},
{
"desc": "If configured, overrides the process soft and hard rlimits (also called ulimits) for file descriptors to the configured value.",
"display_name": "Maximum Process File Descriptors",
"name": "rlimit_fds",
"value": null
},
{
"desc": "The health test thresholds for the weighted average time spent in Java garbage collection. Specified as a percentage of elapsed wall clock time.",
"display_name": "Garbage Collection Duration Thresholds",
"name": "jobhistory_gc_duration_thresholds",
"value": "{\"critical\":\"60.0\",\"warning\":\"30.0\"}"
},
{
"desc": "Soft memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process if and only if the host is facing memory pressure. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Soft Limit",
"name": "rm_memory_soft_limit",
"value": "-1"
}
]

View File

@ -0,0 +1,380 @@
[
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory. Specified as a percentage of the capacity on that filesystem. This setting is not used if a Log Directory Free Space Monitoring Absolute Thresholds setting is configured.",
"display_name": "Log Directory Free Space Monitoring Percentage Thresholds",
"name": "log_directory_free_space_percentage_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"never\"}"
},
{
"desc": "<p>This file contains the rules which govern how log messages are turned into events by the custom log4j appender that this role loads. It is in JSON format, and is composed of a list of rules. Every log message is evaluated against each of these rules in turn to decide whether or not to send an event for that message.</p><p>Each rule has some or all of the following fields:</p><ul><li><span class='code'>alert</span> - whether or not events generated from this rule should be promoted to alerts. A value of \"true\" will cause alerts to be generated. If not specified, the default is \"false\".</li><li><span class='code'>rate</span> <strong>(mandatory)</strong> - the maximum number of log messages matching this rule that may be sent as events every minute. If more than <tt>rate</tt> matching log messages are received in a single minute, the extra messages are ignored. If rate is less than 0, the number of messages per minute is unlimited.</li><li><span class='code'>periodminutes</span> - the number of minutes during which the publisher will only publish <tt>rate</tt> events or fewer. If not specified, the default is <strong>one minute</strong></li><li><span class='code'>threshold</span> - apply this rule only to messages with this log4j severity level or above. An example is \"WARN\" for warning level messages or higher.</li><li><span class='code'>content</span> - match only those messages whose contents match this regular expression.</li><li><span class='code'>exceptiontype</span> - match only those messages which are part of an exception message. The exception type must match this regular expression.</li></ul><br/><p>Example:<span class='code'>{\"alert\": false, \"rate\": 10, \"exceptiontype\": \"java.lang.StringIndexOutOfBoundsException\"}</span></p><p>This rule will send events to Cloudera Manager for every <span class='code'>StringIndexOutOfBoundsException</span>, up to a maximum of 10 every minute.</p>",
"display_name": "Rules to Extract Events from Log Files",
"name": "log_event_whitelist",
"value": "{\n \"version\": \"0\",\n \"rules\": [\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"FATAL\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Instead, use .*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Use .* instead\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 2, \"exceptiontype\": \".*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\"}\n ]\n}\n"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>yarn-site.xml</strong> for this role only.",
"display_name": "NodeManager Advanced Configuration Snippet (Safety Valve) for yarn-site.xml",
"name": "nodemanager_config_safety_valve",
"value": null
},
{
"desc": "The address of the NodeManager web application.",
"display_name": "NodeManager Web Application Address",
"name": "nodemanager_webserver_port",
"value": "8042"
},
{
"desc": "If enabled, adds 'org.apache.hadoop.mapred.ShuffleHandler' to the NodeManager auxiliary services. This is required for MapReduce applications.",
"display_name": "Enable Shuffle Auxiliary Service",
"name": "mapreduce_aux_service",
"value": "true"
},
{
"desc": "Heartbeat interval to ResourceManager",
"display_name": "Heartbeat Interval",
"name": "yarn_nodemanager_heartbeat_interval_ms",
"value": "1000"
},
{
"desc": "Weight for the read I/O requests issued by this role. The greater the weight, the higher the priority of the requests when the host experiences I/O contention. Must be between 100 and 1000. Defaults to 1000 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup I/O Weight",
"name": "rm_io_weight",
"value": "500"
},
{
"desc": "Enables the health test that the NodeManager's process state is consistent with the role configuration",
"display_name": "NodeManager Process Health Test",
"name": "nodemanager_scm_health_enabled",
"value": "true"
},
{
"desc": "The amount of time to wait for the NodeManager to fully start up and connect to the ResourceManager before enforcing the connectivity check.",
"display_name": "NodeManager Connectivity Tolerance at Startup",
"name": "nodemanager_connectivity_tolerance_seconds",
"value": "180"
},
{
"desc": "List of directories on the local filesystem where a NodeManager stores intermediate data files.",
"display_name": "NodeManager Local Directory List",
"name": "yarn_nodemanager_local_dirs",
"value": null
},
{
"desc": "Enables the health check that verifies the NodeManager is connected to the ResourceManager",
"display_name": "NodeManager Connectivity Health Check",
"name": "nodemanager_connectivity_health_enabled",
"value": "true"
},
{
"desc": "Number of seconds after an application finishes before the NodeManager's DeletionService will delete the application's localized file and log directory. To diagnose YARN application problems, set this property's value large enough (for example, to 600 = 10 minutes) to permit examination of these directories.",
"display_name": "Localized Dir Deletion Delay",
"name": "yarn_nodemanager_delete_debug_delay_sec",
"value": "0"
},
{
"desc": "The minimum Linux user ID allowed. Used to prevent other super users.",
"display_name": "Minimum User ID",
"name": "container_executor_min_user_id",
"value": "1000"
},
{
"desc": "Number of threads to use for localization fetching.",
"display_name": "Localizer Fetch Thread Count",
"name": "yarn_nodemanager_localizer_fetch_thread_count",
"value": "4"
},
{
"desc": "Target size of localizer cache in MB, per local directory.",
"display_name": "Localizer Cache Target Size",
"name": "yarn_nodemanager_localizer_cache_target_size_mb",
"value": "10240"
},
{
"desc": "These arguments will be passed as part of the Java command line. Commonly, garbage collection flags or extra debugging flags would be passed here.",
"display_name": "Java Configuration Options for NodeManager",
"name": "node_manager_java_opts",
"value": "-XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:-CMSConcurrentMTEnabled -XX:CMSInitiatingOccupancyFraction=70 -XX:+CMSParallelRemarkEnabled"
},
{
"desc": "When set, this role's process is automatically (and transparently) restarted in the event of an unexpected failure.",
"display_name": "Automatically Restart Process",
"name": "process_auto_restart",
"value": "true"
},
{
"desc": "<p>The configured triggers for this role. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.</p><p>Each trigger has all of the following fields:</p><ul><li><span class='code'>triggerName</span> <strong>(mandatory)</strong> - the name of the trigger. This value must be unique for the specific role. </li><li><span class='code'>triggerExpression</span> <strong>(mandatory)</strong> - a tsquery expression representing the trigger. <li><span class='code'>streamThreshold</span> <strong>(optional)</strong> - the maximum number of streams that can satisfy a condition of a trigger before the condition fires. By default set to 0, and any stream returned will cause the condition to fire. <li><span class='code'>enabled</span> <strong> (optional)</strong> - by default set to 'true'. If set to 'false' the trigger will not be evaluated.</p><p>For example, here is a JSON formatted trigger configured for a DataNode that fires if the DataNode has more than 1500 file-descriptors opened:</p><p><pre>[{\"triggerName\": \"sample-trigger\",\n \"triggerExpression\": \"IF (SELECT fd_open WHERE roleName=$ROLENAME and last(fd_open) > 1500) DO health:red\",\n \"streamThreshold\": 0}, \"enabled\": \"true\"]</pre></p><p>Consult the trigger rules documentation for more details on how to write triggers using tsquery.</p><p>The JSON format is evolving and may change in the future and as a result backward compatibility is not guaranteed between releases at this time.</p>",
"display_name": "Role Triggers",
"name": "role_triggers",
"value": "[]"
},
{
"desc": "Soft memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process if and only if the host is facing memory pressure. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Soft Limit",
"name": "rm_memory_soft_limit",
"value": "-1"
},
{
"desc": "List of users explicitly whitelisted to be allowed to run containers. Users with IDs lower than the \"Minimum User Id\" setting may be whitelisted by using this setting.",
"display_name": "Allowed System Users",
"name": "container_executor_allowed_system_users",
"value": "nobody,impala,hive,llama"
},
{
"desc": "Directory where NodeManager will place its log files.",
"display_name": "NodeManager Log Directory",
"name": "node_manager_log_dir",
"value": "/var/log/hadoop-yarn"
},
{
"desc": "The system group that owns the container-executor binary. This does not need to be changed unless the ownership of the binary is explicitly changed.",
"display_name": "Container Executor Group",
"name": "container_executor_group",
"value": "yarn"
},
{
"desc": "Hard memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Hard Limit",
"name": "rm_memory_hard_limit",
"value": "-1"
},
{
"desc": "The period to review when computing unexpected exits.",
"display_name": "Unexpected Exits Monitoring Period",
"name": "unexpected_exits_window",
"value": "5"
},
{
"desc": "The health test thresholds for the weighted average time spent in Java garbage collection. Specified as a percentage of elapsed wall clock time.",
"display_name": "Garbage Collection Duration Thresholds",
"name": "nodemanager_gc_duration_thresholds",
"value": "{\"critical\":\"60.0\",\"warning\":\"30.0\"}"
},
{
"desc": "The health test thresholds of the number of file descriptors used. Specified as a percentage of file descriptor limit.",
"display_name": "File Descriptor Monitoring Thresholds",
"name": "nodemanager_fd_thresholds",
"value": "{\"critical\":\"70.0\",\"warning\":\"50.0\"}"
},
{
"desc": "Advanced Configuration Snippet (Safety Valve) for Hadoop Metrics2. Properties will be inserted into <strong>hadoop-metrics2.properties</strong>.",
"display_name": "Hadoop Metrics2 Advanced Configuration Snippet (Safety Valve)",
"name": "hadoop_metrics2_safety_valve",
"value": null
},
{
"desc": "Environment variables that containers may override rather than use NodeManager's default.",
"display_name": "Containers Environment Variables Whitelist ",
"name": "yarn_nodemanager_env_whitelist",
"value": "JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,HADOOP_YARN_HOME"
},
{
"desc": "The address of the NodeManager IPC.",
"display_name": "NodeManager IPC Address",
"name": "yarn_nodemanager_address",
"value": "8041"
},
{
"desc": "Environment variables that should be forwarded from the NodeManager's environment to the container's.",
"display_name": "Containers Environment Variable",
"name": "yarn_nodemanager_admin_env",
"value": "MALLOC_ARENA_MAX=$MALLOC_ARENA_MAX"
},
{
"desc": "The period to review when computing the moving average of garbage collection time.",
"display_name": "Garbage Collection Duration Monitoring Period",
"name": "nodemanager_gc_duration_window",
"value": "5"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>log4j.properties</strong> for this role only.",
"display_name": "NodeManager Logging Advanced Configuration Snippet (Safety Valve)",
"name": "log4j_safety_valve",
"value": null
},
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory.",
"display_name": "Log Directory Free Space Monitoring Absolute Thresholds",
"name": "log_directory_free_space_absolute_thresholds",
"value": "{\"critical\":\"5.36870912E9\",\"warning\":\"1.073741824E10\"}"
},
{
"desc": "HDFS directory where application logs are stored when an application completes.",
"display_name": "Remote App Log Directory",
"name": "yarn_nodemanager_remote_app_log_dir",
"value": "/tmp/logs"
},
{
"desc": "Time in seconds to retain user logs. Only applicable if log aggregation is disabled.",
"display_name": "Log Retain Duration",
"name": "yarn_nodemanager_log_retain_seconds",
"value": "10800"
},
{
"desc": "Number of threads used in cleanup.",
"display_name": "Cleanup Thread Count",
"name": "yarn_nodemanager_delete_thread_count",
"value": "4"
},
{
"desc": "If configured, overrides the process soft and hard rlimits (also called ulimits) for file descriptors to the configured value.",
"display_name": "Maximum Process File Descriptors",
"name": "rlimit_fds",
"value": null
},
{
"desc": "Number of threads container manager uses.",
"display_name": "Container Manager Thread Count",
"name": "yarn_nodemanager_container_manager_thread_count",
"value": "20"
},
{
"desc": "When set, Cloudera Manager will send alerts when the health of this role reaches the threshold specified by the EventServer setting eventserver_health_events_alert_threshold",
"display_name": "Enable Health Alerts for this Role",
"name": "enable_alerts",
"value": "true"
},
{
"desc": "Address where the localizer IPC is.",
"display_name": "Localizer Port",
"name": "yarn_nodemanager_localizer_address",
"value": "8040"
},
{
"desc": "Address where the localizer IPC is.",
"display_name": "Localizer Cache Cleanup Interval",
"name": "yarn_nodemanager_localizer_cache_cleanup_interval_ms",
"value": "600000"
},
{
"desc": "When set, a SIGKILL signal is sent to the role process when java.lang.OutOfMemoryError is thrown.",
"display_name": "Kill When Out of Memory",
"name": "oom_sigkill_enabled",
"value": "true"
},
{
"desc": "Absolute path to the script which is periodically run by the node health monitoring service to determine if the node is healthy or not. If the value of this key is empty or the file does not exist in the location configured here, the node health monitoring service is not started.",
"display_name": "Healthchecker Script Path",
"name": "mapred_healthchecker_script_path",
"value": ""
},
{
"desc": "When set, generates heap dump file when java.lang.OutOfMemoryError is thrown.",
"display_name": "Dump Heap When Out of Memory",
"name": "oom_heap_dump_enabled",
"value": "false"
},
{
"desc": "The health test thresholds for unexpected exits encountered within a recent period specified by the unexpected_exits_window configuration for the role.",
"display_name": "Unexpected Exits Thresholds",
"name": "unexpected_exits_thresholds",
"value": "{\"critical\":\"any\",\"warning\":\"never\"}"
},
{
"desc": "When computing the overall NodeManager health, consider the host's health.",
"display_name": "NodeManager Host Health Test",
"name": "nodemanager_host_health_enabled",
"value": "true"
},
{
"desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.",
"display_name": "Enable Configuration Change Alerts",
"name": "enable_config_alerts",
"value": "false"
},
{
"desc": "Path (rooted in the cgroups hierarchy on the machine) where to place YARN-managed cgroups.",
"display_name": "CGroups Hierarchy",
"name": "linux_container_executor_cgroups_hierarchy",
"value": "/hadoop-yarn"
},
{
"desc": "Enables the health test that the Cloudera Manager Agent can successfully contact and gather metrics from the web server.",
"display_name": "Web Metric Collection",
"name": "nodemanager_web_metric_collection_enabled",
"value": "true"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>mapred-site.xml</strong> for this role only.",
"display_name": "NodeManager Advanced Configuration Snippet (Safety Valve) for mapred-site.xml",
"name": "nodemanager_mapred_safety_valve",
"value": null
},
{
"desc": "The maximum size, in megabytes, per log file for NodeManager logs. Typically used by log4j.",
"display_name": "NodeManager Max Log Size",
"name": "max_log_size",
"value": "200"
},
{
"desc": "The remote log dir will be created at {yarn.nodemanager.remote-app-log-dir}/${user}/{thisParam}",
"display_name": "Remote App Log Directory Suffix",
"name": "yarn_nodemanager_remote_app_log_dir_suffix",
"value": "logs"
},
{
"desc": "Number of threads to handle localization requests.",
"display_name": "Localizer Client Thread Count",
"name": "yarn_nodemanager_localizer_client_thread_count",
"value": "5"
},
{
"desc": "The maximum number of rolled log files to keep for NodeManager logs. Typically used by log4j.",
"display_name": "NodeManager Maximum Log File Backups",
"name": "max_log_backup_index",
"value": "10"
},
{
"desc": "Amount of physical memory, in MiB, that can be allocated for containers.",
"display_name": "Container Memory",
"name": "yarn_nodemanager_resource_memory_mb",
"value": "8192"
},
{
"desc": "The health test thresholds on the duration of the metrics request to the web server.",
"display_name": "Web Metric Collection Duration",
"name": "nodemanager_web_metric_collection_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"10000.0\"}"
},
{
"desc": "Number of CPU shares to assign to this role. The greater the number of shares, the larger the share of the host's CPUs that will be given to this role when the host experiences CPU contention. Must be between 2 and 262144. Defaults to 1024 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup CPU Shares",
"name": "rm_cpu_shares",
"value": "1024"
},
{
"desc": "Number of virtual CPU cores that can be allocated for containers. This parameter has no effect prior to CDH 4.4.",
"display_name": "Container Virtual CPU Cores",
"name": "yarn_nodemanager_resource_cpu_vcores",
"value": "8"
},
{
"desc": "Path to directory where heap dumps are generated when java.lang.OutOfMemoryError error is thrown. This directory is automatically created if it doesn't exist. However, if this directory already exists, role user must have write access to this directory. If this directory is shared amongst multiple roles, it should have 1777 permissions. Note that the heap dump files are created with 600 permissions and are owned by the role user. The amount of free space in this directory should be greater than the maximum Java Process heap size configured for this role.",
"display_name": "Heap Dump Directory",
"name": "oom_heap_dump_dir",
"value": "/tmp"
},
{
"desc": "Comma-separated list of arguments which are to be passed to node health script when it is being launched.",
"display_name": "Healthchecker Script Arguments",
"name": "mapred_healthchecker_script_args",
"value": ""
},
{
"desc": "List of directories on the local filesystem where a NodeManager stores container log files.",
"display_name": "NodeManager Container Log Directories",
"name": "yarn_nodemanager_log_dirs",
"value": "/var/log/hadoop-yarn/container"
},
{
"desc": "The minimum log level for NodeManager logs",
"display_name": "NodeManager Logging Threshold",
"name": "log_threshold",
"value": "INFO"
},
{
"desc": "Maximum size for the Java Process heap memory. Passed to Java -Xmx. Measured in bytes.",
"display_name": "Java Heap Size of NodeManager in Bytes",
"name": "node_manager_java_heapsize",
"value": "1073741824"
}
]

View File

@ -0,0 +1,440 @@
[
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory. Specified as a percentage of the capacity on that filesystem. This setting is not used if a Log Directory Free Space Monitoring Absolute Thresholds setting is configured.",
"display_name": "Log Directory Free Space Monitoring Percentage Thresholds",
"name": "log_directory_free_space_percentage_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"never\"}"
},
{
"desc": "The health test thresholds of the number of file descriptors used. Specified as a percentage of file descriptor limit.",
"display_name": "File Descriptor Monitoring Thresholds",
"name": "resourcemanager_fd_thresholds",
"value": "{\"critical\":\"70.0\",\"warning\":\"50.0\"}"
},
{
"desc": "<p>This file contains the rules which govern how log messages are turned into events by the custom log4j appender that this role loads. It is in JSON format, and is composed of a list of rules. Every log message is evaluated against each of these rules in turn to decide whether or not to send an event for that message.</p><p>Each rule has some or all of the following fields:</p><ul><li><span class='code'>alert</span> - whether or not events generated from this rule should be promoted to alerts. A value of \"true\" will cause alerts to be generated. If not specified, the default is \"false\".</li><li><span class='code'>rate</span> <strong>(mandatory)</strong> - the maximum number of log messages matching this rule that may be sent as events every minute. If more than <tt>rate</tt> matching log messages are received in a single minute, the extra messages are ignored. If rate is less than 0, the number of messages per minute is unlimited.</li><li><span class='code'>periodminutes</span> - the number of minutes during which the publisher will only publish <tt>rate</tt> events or fewer. If not specified, the default is <strong>one minute</strong></li><li><span class='code'>threshold</span> - apply this rule only to messages with this log4j severity level or above. An example is \"WARN\" for warning level messages or higher.</li><li><span class='code'>content</span> - match only those messages whose contents match this regular expression.</li><li><span class='code'>exceptiontype</span> - match only those messages which are part of an exception message. The exception type must match this regular expression.</li></ul><br/><p>Example:<span class='code'>{\"alert\": false, \"rate\": 10, \"exceptiontype\": \"java.lang.StringIndexOutOfBoundsException\"}</span></p><p>This rule will send events to Cloudera Manager for every <span class='code'>StringIndexOutOfBoundsException</span>, up to a maximum of 10 every minute.</p>",
"display_name": "Rules to Extract Events from Log Files",
"name": "log_event_whitelist",
"value": "{\n \"version\": \"0\",\n \"rules\": [\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"FATAL\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Instead, use .*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Use .* instead\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 2, \"exceptiontype\": \".*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\"}\n ]\n}\n"
},
{
"desc": "Enter an XML string that will be inserted verbatim into the Fair Scheduler allocations file. For CDH5, overrides the configuration set using the Pools configuration UI. For CDH4, this is the only way to configure the Fair Scheduler for YARN.",
"display_name": "Fair Scheduler XML Advanced Configuration Snippet (Safety Valve)",
"name": "resourcemanager_fair_scheduler_configuration",
"value": null
},
{
"desc": "For advanced use only, a string to be inserted into <strong>nodes_allow.txt</strong> for this role only.",
"display_name": "ResourceManager Advanced Configuration Snippet (Safety Valve) for nodes_allow.txt",
"name": "rm_hosts_allow_safety_valve",
"value": null
},
{
"desc": "The maximum number that an ApplicationMaster will retry.",
"display_name": "ApplicationMaster Max Retries",
"name": "yarn_resourcemanager_am_max_retries",
"value": "1"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>nodes_exclude.txt</strong> for this role only.",
"display_name": "ResourceManager Advanced Configuration Snippet (Safety Valve) for nodes_exclude.txt",
"name": "rm_hosts_exclude_safety_valve",
"value": null
},
{
"desc": "The health test thresholds on the duration of the metrics request to the web server.",
"display_name": "Web Metric Collection Duration",
"name": "resourcemanager_web_metric_collection_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"10000.0\"}"
},
{
"desc": "Weight for the read I/O requests issued by this role. The greater the weight, the higher the priority of the requests when the host experiences I/O contention. Must be between 100 and 1000. Defaults to 1000 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup I/O Weight",
"name": "rm_io_weight",
"value": "500"
},
{
"desc": "The maximum number of completed applications that the ResourceManager keeps.",
"display_name": "Max Completed Applications",
"name": "yarn_resourcemanager_max_completed_applications",
"value": "10000"
},
{
"desc": "Hard memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Hard Limit",
"name": "rm_memory_hard_limit",
"value": "-1"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>yarn-site.xml</strong> for this role only.",
"display_name": "ResourceManager Advanced Configuration Snippet (Safety Valve) for yarn-site.xml",
"name": "resourcemanager_config_safety_valve",
"value": null
},
{
"desc": "These arguments will be passed as part of the Java command line. Commonly, garbage collection flags or extra debugging flags would be passed here.",
"display_name": "Java Configuration Options for ResourceManager",
"name": "resource_manager_java_opts",
"value": "-XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:-CMSConcurrentMTEnabled -XX:CMSInitiatingOccupancyFraction=70 -XX:+CMSParallelRemarkEnabled"
},
{
"desc": "Enables the health test that the ResourceManager's process state is consistent with the role configuration",
"display_name": "ResourceManager Process Health Test",
"name": "resourcemanager_scm_health_enabled",
"value": "true"
},
{
"desc": "Directory where ResourceManager will place its log files.",
"display_name": "ResourceManager Log Directory",
"name": "resource_manager_log_dir",
"value": "/var/log/hadoop-yarn"
},
{
"desc": "The smallest amount of physical memory, in MiB, that can be requested for a container. If using the Capacity or FIFO scheduler (or any scheduler, prior to CDH 5), memory requests will be rounded up to the nearest multiple of this number.",
"display_name": "Container Memory Minimum",
"name": "yarn_scheduler_minimum_allocation_mb",
"value": "1024"
},
{
"desc": "The number of threads used to handle applications manager requests.",
"display_name": "Client Thread Count",
"name": "yarn_resourcemanager_client_thread_count",
"value": "50"
},
{
"desc": "The periodic interval that the ResourceManager will check whether containers are still alive.",
"display_name": "Container Monitor Interval",
"name": "yarn_resourcemanager_container_liveness_monitor_interval_ms",
"value": "600000"
},
{
"desc": "When set, this role's process is automatically (and transparently) restarted in the event of an unexpected failure.",
"display_name": "Automatically Restart Process",
"name": "process_auto_restart",
"value": "false"
},
{
"desc": "The expiry interval to wait until a NodeManager is considered dead.",
"display_name": "NodeManager Monitor Expiry",
"name": "yarn_nm_liveness_monitor_expiry_interval_ms",
"value": "600000"
},
{
"desc": "The largest amount of physical memory, in MiB, that can be requested for a container.",
"display_name": "Container Memory Maximum",
"name": "yarn_scheduler_maximum_allocation_mb",
"value": "65536"
},
{
"desc": "The largest number of virtual CPU cores that can be requested for a container. This parameter has no effect prior to CDH 4.4.",
"display_name": "Container Virtual CPU Cores Maximum",
"name": "yarn_scheduler_maximum_allocation_vcores",
"value": "32"
},
{
"desc": "Soft memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process if and only if the host is facing memory pressure. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Soft Limit",
"name": "rm_memory_soft_limit",
"value": "-1"
},
{
"desc": "Enables the health test that the Cloudera Manager Agent can successfully contact and gather metrics from the web server.",
"display_name": "Web Metric Collection",
"name": "resourcemanager_web_metric_collection_enabled",
"value": "true"
},
{
"desc": "Number of threads to handle resource tracker calls.",
"display_name": "Resource Tracker Thread Count",
"name": "yarn_resourcemanager_resource_tracker_client_thread_count",
"value": "50"
},
{
"desc": "If using the Fair Scheduler, virtual core requests will be rounded up to the nearest multiple of this number. This parameter has no effect prior to CDH 5.",
"display_name": "Container Virtual CPU Cores Increment",
"name": "yarn_scheduler_increment_allocation_vcores",
"value": "1"
},
{
"desc": "The address of the scheduler interface in the ResourceManager.",
"display_name": "Scheduler Address",
"name": "yarn_resourcemanager_scheduler_address",
"value": "8030"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>mapred-site.xml</strong> for this role only.",
"display_name": "ResourceManager Advanced Configuration Snippet (Safety Valve) for mapred-site.xml",
"name": "resourcemanager_mapred_safety_valve",
"value": null
},
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory.",
"display_name": "Log Directory Free Space Monitoring Absolute Thresholds",
"name": "log_directory_free_space_absolute_thresholds",
"value": "{\"critical\":\"5.36870912E9\",\"warning\":\"1.073741824E10\"}"
},
{
"desc": "The address of the ResourceManager web application.",
"display_name": "Web Application Address",
"name": "resourcemanager_webserver_port",
"value": "8088"
},
{
"desc": "The period to review when computing the moving average of garbage collection time.",
"display_name": "Garbage Collection Duration Monitoring Period",
"name": "resourcemanager_gc_duration_window",
"value": "5"
},
{
"desc": "For applications that request containers on particular racks, the number of scheduling opportunities since the last container assignment to wait before accepting a placement on another rack. Expressed as a float between 0 and 1, which, as a fraction of the cluster size, is the number of scheduling opportunities to pass up. If not set, this means don't pass up any scheduling opportunities. Requires Fair Scheduler continuous scheduling to be disabled. If continuous scheduling is enabled, yarn.scheduler.fair.locality-delay-rack-ms should be used instead.",
"display_name": "Fair Scheduler Rack Locality Threshold",
"name": "resourcemanager_fair_scheduler_locality_threshold_rack",
"value": null
},
{
"desc": "The period to review when computing unexpected exits.",
"display_name": "Unexpected Exits Monitoring Period",
"name": "unexpected_exits_window",
"value": "5"
},
{
"desc": "Advanced Configuration Snippet (Safety Valve) for Hadoop Metrics2. Properties will be inserted into <strong>hadoop-metrics2.properties</strong>.",
"display_name": "Hadoop Metrics2 Advanced Configuration Snippet (Safety Valve)",
"name": "hadoop_metrics2_safety_valve",
"value": null
},
{
"desc": "The address of the applications manager interface in the ResourceManager.",
"display_name": "ResourceManager Address",
"name": "yarn_resourcemanager_address",
"value": "8032"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>log4j.properties</strong> for this role only.",
"display_name": "ResourceManager Logging Advanced Configuration Snippet (Safety Valve)",
"name": "log4j_safety_valve",
"value": null
},
{
"desc": "Enables multiple Fair Scheduler container assignments in one heartbeat, which improves cluster throughput when there are many small tasks to run.",
"display_name": "Fair Scheduler Assign Multiple Tasks",
"name": "resourcemanager_fair_scheduler_assign_multiple",
"value": "false"
},
{
"desc": "The health test thresholds for the weighted average time spent in Java garbage collection. Specified as a percentage of elapsed wall clock time.",
"display_name": "Garbage Collection Duration Thresholds",
"name": "resourcemanager_gc_duration_thresholds",
"value": "{\"critical\":\"60.0\",\"warning\":\"30.0\"}"
},
{
"desc": "When enabled, any applications that were running on the cluster when the ResourceManager died will be recovered when the ResourceManager next starts. <strong>Note:</strong> If RM-HA is enabled, then this configuration is always enabled.",
"display_name": "Enable ResourceManager Recovery",
"name": "yarn_resourcemanager_recovery_enabled",
"value": "false"
},
{
"desc": "The periodic interval that the ResourceManager will check whether ApplicationMasters is still alive.",
"display_name": "ApplicationMaster Monitor Interval",
"name": "yarn_resourcemanager_amliveliness_monitor_interval_ms",
"value": "1000"
},
{
"desc": "For applications that request containers on particular nodes, the minimum time in milliseconds the Fair Scheduler waits before accepting a placement on another node. Requires Fair Scheduler continuous scheduling to be enabled. If continuous scheduling is disabled, yarn.scheduler.fair.locality.threshold.node should be used instead.",
"display_name": "Fair Scheduler Node Locality Delay",
"name": "yarn_scheduler_fair_locality_delay_node_ms",
"value": "2000"
},
{
"desc": "Number of threads used to handle the ResourceManager admin interface.",
"display_name": "Admin Client Thread Count",
"name": "yarn_resourcemanager_admin_client_thread_count",
"value": "1"
},
{
"desc": "If configured, overrides the process soft and hard rlimits (also called ulimits) for file descriptors to the configured value.",
"display_name": "Maximum Process File Descriptors",
"name": "rlimit_fds",
"value": null
},
{
"desc": "When set, Cloudera Manager will send alerts when the health of this role reaches the threshold specified by the EventServer setting eventserver_health_events_alert_threshold",
"display_name": "Enable Health Alerts for this Role",
"name": "enable_alerts",
"value": "true"
},
{
"desc": "Maximum size for the Java Process heap memory. Passed to Java -Xmx. Measured in bytes.",
"display_name": "Java Heap Size of ResourceManager in Bytes",
"name": "resource_manager_java_heapsize",
"value": "1073741824"
},
{
"desc": "The maximum size, in megabytes, per log file for ResourceManager logs. Typically used by log4j.",
"display_name": "ResourceManager Max Log Size",
"name": "max_log_size",
"value": "200"
},
{
"desc": "When set, a SIGKILL signal is sent to the role process when java.lang.OutOfMemoryError is thrown.",
"display_name": "Kill When Out of Memory",
"name": "oom_sigkill_enabled",
"value": "true"
},
{
"desc": "When enabled, if a pool's minimum share is not met for some period of time, the Fair Scheduler preempts applications in other pools. Preemption guarantees that production applications are not starved while also allowing the cluster to be used for experimental and research applications. To minimize wasted computation, the Fair Scheduler preempts the most recently launched applications.",
"display_name": "Fair Scheduler Preemption",
"name": "resourcemanager_fair_scheduler_preemption",
"value": "false"
},
{
"desc": "When set, generates heap dump file when java.lang.OutOfMemoryError is thrown.",
"display_name": "Dump Heap When Out of Memory",
"name": "oom_heap_dump_enabled",
"value": "false"
},
{
"desc": "The smallest number of virtual CPU cores that can be requested for a container. If using the Capacity or FIFO scheduler (or any scheduler, prior to CDH 5), virtual core requests will be rounded up to the nearest multiple of this number. This parameter has no effect prior to CDH 4.4.",
"display_name": "Container Virtual CPU Cores Minimum",
"name": "yarn_scheduler_minimum_allocation_vcores",
"value": "1"
},
{
"desc": "The health test thresholds for unexpected exits encountered within a recent period specified by the unexpected_exits_window configuration for the role.",
"display_name": "Unexpected Exits Thresholds",
"name": "unexpected_exits_thresholds",
"value": "{\"critical\":\"any\",\"warning\":\"never\"}"
},
{
"desc": "If using the Fair Scheduler, memory requests will be rounded up to the nearest multiple of this number. This parameter has no effect prior to CDH 5.",
"display_name": "Container Memory Increment",
"name": "yarn_scheduler_increment_allocation_mb",
"value": "512"
},
{
"desc": "<p>The configured triggers for this role. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.</p><p>Each trigger has all of the following fields:</p><ul><li><span class='code'>triggerName</span> <strong>(mandatory)</strong> - the name of the trigger. This value must be unique for the specific role. </li><li><span class='code'>triggerExpression</span> <strong>(mandatory)</strong> - a tsquery expression representing the trigger. <li><span class='code'>streamThreshold</span> <strong>(optional)</strong> - the maximum number of streams that can satisfy a condition of a trigger before the condition fires. By default set to 0, and any stream returned will cause the condition to fire. <li><span class='code'>enabled</span> <strong> (optional)</strong> - by default set to 'true'. If set to 'false' the trigger will not be evaluated.</p><p>For example, here is a JSON formatted trigger configured for a DataNode that fires if the DataNode has more than 1500 file-descriptors opened:</p><p><pre>[{\"triggerName\": \"sample-trigger\",\n \"triggerExpression\": \"IF (SELECT fd_open WHERE roleName=$ROLENAME and last(fd_open) > 1500) DO health:red\",\n \"streamThreshold\": 0}, \"enabled\": \"true\"]</pre></p><p>Consult the trigger rules documentation for more details on how to write triggers using tsquery.</p><p>The JSON format is evolving and may change in the future and as a result backward compatibility is not guaranteed between releases at this time.</p>",
"display_name": "Role Triggers",
"name": "role_triggers",
"value": "[]"
},
{
"desc": "If enabled, the ResourceManager binds to the wildcard address (\"0.0.0.0\") on all of its ports.",
"display_name": "Bind ResourceManager to Wildcard Address",
"name": "yarn_rm_bind_wildcard",
"value": "false"
},
{
"desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.",
"display_name": "Enable Configuration Change Alerts",
"name": "enable_config_alerts",
"value": "false"
},
{
"desc": "Enable continuous scheduling in the Fair Scheduler. When enabled, scheduling decisions are decoupled from NodeManager heartbeats, leading to faster resource allocations.",
"display_name": "Enable Fair Scheduler Continuous Scheduling",
"name": "yarn_scheduler_fair_continuous_scheduling_enabled",
"value": "true"
},
{
"desc": "When computing the overall ResourceManager health, consider the host's health.",
"display_name": "ResourceManager Host Health Test",
"name": "resourcemanager_host_health_enabled",
"value": "true"
},
{
"desc": "The number of threads used to handle requests through the scheduler interface.",
"display_name": "Scheduler Thread Count",
"name": "yarn_resourcemanager_scheduler_client_thread_count",
"value": "50"
},
{
"desc": "The maximum number of rolled log files to keep for ResourceManager logs. Typically used by log4j.",
"display_name": "ResourceManager Maximum Log File Backups",
"name": "max_log_backup_index",
"value": "10"
},
{
"desc": "When enabled, the Fair Scheduler will assign shares to individual apps based on their size, rather than providing an equal share to all apps regardless of size.",
"display_name": "Fair Scheduler Size-Based Weight",
"name": "resourcemanager_fair_scheduler_size_based_weight",
"value": "false"
},
{
"desc": "For applications that request containers on particular nodes, the number of scheduling opportunities since the last container assignment to wait before accepting a placement on another node. Expressed as a float between 0 and 1, which, as a fraction of the cluster size, is the number of scheduling opportunities to pass up. If not set, this means don't pass up any scheduling opportunities. Requires Fair Scheduler continuous scheduling to be disabled. If continuous scheduling is enabled, yarn.scheduler.fair.locality-delay-node-ms should be used instead.",
"display_name": "Fair Scheduler Node Locality Threshold",
"name": "resourcemanager_fair_scheduler_locality_threshold_node",
"value": null
},
{
"desc": "Number of CPU shares to assign to this role. The greater the number of shares, the larger the share of the host's CPUs that will be given to this role when the host experiences CPU contention. Must be between 2 and 262144. Defaults to 1024 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup CPU Shares",
"name": "rm_cpu_shares",
"value": "1024"
},
{
"desc": "Path to directory where heap dumps are generated when java.lang.OutOfMemoryError error is thrown. This directory is automatically created if it doesn't exist. However, if this directory already exists, role user must have write access to this directory. If this directory is shared amongst multiple roles, it should have 1777 permissions. Note that the heap dump files are created with 600 permissions and are owned by the role user. The amount of free space in this directory should be greater than the maximum Java Process heap size configured for this role.",
"display_name": "Heap Dump Directory",
"name": "oom_heap_dump_dir",
"value": "/tmp"
},
{
"desc": "For applications that request containers on particular racks, the minimum time in milliseconds the Fair Scheduler waits before accepting a placement on another rack. Requires Fair Scheduler continuous scheduling to be enabled. If continuous scheduling is disabled, yarn.scheduler.fair.locality.threshold.rack should be used instead.",
"display_name": "Fair Scheduler Rack Locality Delay",
"name": "yarn_scheduler_fair_locality_delay_rack_ms",
"value": "4000"
},
{
"desc": "When set to <b>true</b>, the Fair Scheduler uses the username as the default pool name, in the event that a pool name is not specified. When set to <b>false</b>, all applications are run in a shared pool, called <b>default</b>.",
"display_name": "Fair Scheduler User As Default Queue",
"name": "resourcemanager_fair_scheduler_user_as_default_queue",
"value": "true"
},
{
"desc": "The address of the resource tracker interface in the ResourceManager.",
"display_name": "Resource Tracker Address",
"name": "yarn_resourcemanager_resource_tracker_address",
"value": "8031"
},
{
"desc": "The expiry interval to wait until an ApplicationMaster is considered dead.",
"display_name": "ApplicationMaster Monitor Expiry",
"name": "yarn_am_liveness_monitor_expiry_interval_ms",
"value": "600000"
},
{
"desc": "The class to use as the resource scheduler. FairScheduler is only supported in CDH 4.2.1 and later.",
"display_name": "Scheduler Class",
"name": "yarn_resourcemanager_scheduler_class",
"value": "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler"
},
{
"desc": "The minimum log level for ResourceManager logs",
"display_name": "ResourceManager Logging Threshold",
"name": "log_threshold",
"value": "INFO"
},
{
"desc": "The amount of time allowed after this role is started that failures of health checks that rely on communication with this role will be tolerated.",
"display_name": "Health Check Startup Tolerance",
"name": "resourcemanager_startup_tolerance_minutes",
"value": "5"
},
{
"desc": "The address of the admin interface in the ResourceManager.",
"display_name": "Administration Address",
"name": "yarn_resourcemanager_admin_address",
"value": "8033"
},
{
"desc": "The periodic interval that the ResourceManager will check whether NodeManagers are still alive.",
"display_name": "NodeManager Monitor Interval",
"name": "yarn_resourcemanager_nm_liveness_monitor_interval_ms",
"value": "1000"
},
{
"desc": "Enter an XML string that represents the Capacity Scheduler configuration.",
"display_name": "Capacity Scheduler Configuration",
"name": "resourcemanager_capacity_scheduler_configuration",
"value": "<?xml version=\"1.0\"?>\n<configuration>\n <property>\n <name>yarn.scheduler.capacity.root.queues</name>\n <value>default</value>\n </property>\n <property>\n <name>yarn.scheduler.capacity.root.capacity</name>\n <value>100</value>\n </property>\n <property>\n <name>yarn.scheduler.capacity.root.default.capacity</name>\n <value>100</value>\n </property>\n</configuration>\n"
}
]

View File

@ -0,0 +1,284 @@
[
{
"desc": "ACL that determines which users and groups can submit and kill applications in any pool, and can issue commands on ResourceManager roles.",
"display_name": "Admin ACL",
"name": "yarn_admin_acl",
"value": "*"
},
{
"desc": "The health test thresholds of the overall NodeManager health. The check returns \"Concerning\" health if the percentage of \"Healthy\" NodeManagers falls below the warning threshold. The check is unhealthy if the total percentage of \"Healthy\" and \"Concerning\" NodeManagers falls below the critical threshold.",
"display_name": "Healthy NodeManager Monitoring Thresholds",
"name": "yarn_nodemanagers_healthy_thresholds",
"value": "{\"critical\":\"90.0\",\"warning\":\"95.0\"}"
},
{
"desc": "Comma-delimited list of hosts where you want to allow the HDFS user to impersonate other users. The default '*' allows all hosts. To disable entirely, use a string that doesn't correspond to a host name, such as '_no_host'.",
"display_name": "HDFS Proxy User Hosts",
"name": "hdfs_proxy_user_hosts_list",
"value": "*"
},
{
"desc": "JSON representation of all the configurations that the Fair Scheduler can take on across all schedules. Typically edited using the Pools configuration UI.",
"display_name": "Fair Scheduler Allocations",
"name": "yarn_fs_scheduled_allocations",
"value": "{\"defaultMinSharePreemptionTimeout\":null,\"defaultQueueSchedulingPolicy\":null,\"fairSharePreemptionTimeout\":null,\"queueMaxAppsDefault\":null,\"queuePlacementRules\":null,\"queues\":[{\"aclAdministerApps\":null,\"aclSubmitApps\":null,\"minSharePreemptionTimeout\":null,\"name\":\"root\",\"queues\":[{\"aclAdministerApps\":null,\"aclSubmitApps\":null,\"minSharePreemptionTimeout\":null,\"name\":\"default\",\"queues\":[],\"schedulablePropertiesList\":[{\"impalaMaxMemory\":null,\"impalaMaxQueuedQueries\":null,\"impalaMaxRunningQueries\":null,\"maxResources\":null,\"maxRunningApps\":null,\"minResources\":null,\"scheduleName\":\"default\",\"weight\":null}],\"schedulingPolicy\":null}],\"schedulablePropertiesList\":[{\"impalaMaxMemory\":null,\"impalaMaxQueuedQueries\":null,\"impalaMaxRunningQueries\":null,\"maxResources\":null,\"maxRunningApps\":null,\"minResources\":null,\"scheduleName\":\"default\",\"weight\":null}],\"schedulingPolicy\":null}],\"userMaxAppsDefault\":null,\"users\":[]}"
},
{
"desc": "Controls which applications non-admin users can see in the applications list view",
"display_name": "Non-Admin Users Applications List Visibility Settings",
"name": "user_application_list_settings",
"value": "ALL"
},
{
"desc": "The amount of time after ResourceManager(s) start that the lack of an active ResourceManager will be tolerated. This is an advanced option that does not often need to be changed.",
"display_name": "ResourceManager Activation Startup Tolerance",
"name": "yarn_resourcemanager_activation_startup_tolerance",
"value": "180"
},
{
"desc": "A list specifying the rules to run to determine which Fair Scheduler configuration to use. Typically edited using the Rules configuration UI.",
"display_name": "Fair Scheduler Configuration Rules",
"name": "yarn_fs_schedule_rules",
"value": "[]"
},
{
"desc": "Entries to add to the classpaths of YARN applications.",
"display_name": "YARN Application Classpath",
"name": "yarn_application_classpath",
"value": "$HADOOP_CLIENT_CONF_DIR,$HADOOP_CONF_DIR,$HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,$HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,$HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/*"
},
{
"desc": "Comma-delimited list of hosts where you want to allow the Oozie user to impersonate other users. The default '*' allows all hosts. To disable entirely, use a string that doesn't correspond to a host name, such as '_no_host'.",
"display_name": "Oozie Proxy User Hosts",
"name": "oozie_proxy_user_hosts_list",
"value": "*"
},
{
"desc": "The user that this service's processes should run as. (Except the Job History server, which has its own user)",
"display_name": "System User",
"name": "process_username",
"value": "yarn"
},
{
"desc": "Comma-delimited list of groups that you want to allow the Hue user to impersonate. The default '*' allows all groups. To disable entirely, use a string that doesn't correspond to a group name, such as '_no_group_'.",
"display_name": "Hue Proxy User Groups",
"name": "hue_proxy_user_groups_list",
"value": "*"
},
{
"desc": "The home directory of the system user on the local filesystem. This setting must reflect the system's configured value - only changing it here will not change the actual home directory.",
"display_name": "System User's Home Directory",
"name": "hdfs_user_home_dir",
"value": "/var/lib/hadoop-yarn"
},
{
"desc": "Whether YARN creates a cgroup per container, thereby isolating the CPU usage of containers. When set, <tt>yarn.nodemanager.linux-container-executor.resources-handler.class</tt> is configured to <tt>org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler</tt>. The host (in Cloudera Manager) must have cgroups enabled. The number of shares allocated to all YARN containers is configured by adjusting the CPU shares value of the Node Manager in the Resource Management configuration group.",
"display_name": "Use CGroups for Resource Management",
"name": "yarn_service_cgroups",
"value": "false"
},
{
"desc": "Name of the HDFS service that this YARN service instance depends on",
"display_name": "HDFS Service",
"name": "hdfs_service",
"value": null
},
{
"desc": "Cluster ID used when ResourceManager is Highly Available.",
"display_name": "RM-HA Cluster ID",
"name": "yarn_rm_ha_cluster_id",
"value": "yarnRM"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>yarn-site.xml</strong>. Applies to configurations of all roles in this service except client configuration.",
"display_name": "YARN Service Advanced Configuration Snippet (Safety Valve) for yarn-site.xml",
"name": "yarn_service_config_safety_valve",
"value": null
},
{
"desc": "Comma-delimited list of hosts where you want to allow the Hue user to impersonate other users. The default '*' allows all hosts. To disable entirely, use a string that doesn't correspond to a host name, such as '_no_host'.",
"display_name": "Hue Proxy User Hosts",
"name": "hue_proxy_user_hosts_list",
"value": "*"
},
{
"desc": "The tolerance window used in YARN service tests that depend on detection of the active ResourceManager.",
"display_name": "Active ResourceManager Detection Window",
"name": "yarn_active_resourcemanager_detecton_window",
"value": "3"
},
{
"desc": "Name of the ZooKeeper service that this YARN service instance depends on",
"display_name": "ZooKeeper Service",
"name": "zookeeper_service",
"value": null
},
{
"desc": "Enables authentication for hadoop HTTP web-consoles for all roles of this service. <b>Note:</b> This is effective only if security is enabled for the HDFS service.",
"display_name": "Enable Authentication for HTTP Web-Consoles",
"name": "hadoop_secure_web_ui",
"value": "false"
},
{
"desc": "Comma-delimited list of groups that you want to allow the HDFS user to impersonate. The default '*' allows all groups. To disable entirely, use a string that doesn't correspond to a group name, such as '_no_group_'.",
"display_name": "HDFS Proxy User Groups",
"name": "hdfs_proxy_user_groups_list",
"value": "*"
},
{
"desc": "When computing the overall YARN service health, whether to consider the active ResourceManager's health.",
"display_name": "Active ResourceManager Role Health Check",
"name": "yarn_resourcemanagers_health_enabled",
"value": "true"
},
{
"desc": "When set to <b>true</b>, pools specified in applications but not explicitly configured, are created at runtime with default settings. When set to <b>false</b>, applications specifying pools not explicitly configured run in a pool named <b>default</b>. This setting applies when an application explicitly specifies a pool and when the application runs in a pool named with the username associated with the application.",
"display_name": "Allow Undeclared Pools",
"name": "yarn_scheduler_fair_allow_undeclared_pools",
"value": "true"
},
{
"desc": "When computing the overall YARN health, consider JobHistory Server's health",
"display_name": "JobHistory Server Role Health Test",
"name": "yarn_jobhistoryserver_health_enabled",
"value": "true"
},
{
"desc": "Comma-delimited list of groups that you want to allow the Hive user to impersonate. The default '*' allows all groups. To disable entirely, use a string that doesn't correspond to a group name, such as '_no_group_'.",
"display_name": "Hive Proxy User Groups",
"name": "hive_proxy_user_groups_list",
"value": "*"
},
{
"desc": "How long to keep aggregation logs before deleting them.",
"display_name": "Log Aggregation Retention Period",
"name": "yarn_log_aggregation_retain_seconds",
"value": "604800"
},
{
"desc": "When computing the overall YARN service health, whether to consider the health of the standby ResourceManager.",
"display_name": "Standby ResourceManager Health Check",
"name": "yarn_standby_resourcemanager_health_enabled",
"value": "true"
},
{
"desc": "Whether the Oozie superuser can impersonate the members of a comma-delimited list of groups. The default '*' allows all groups. To disable entirely, use a string that doesn't correspond to a group name, such as '_no_group_'.",
"display_name": "Oozie Proxy User Groups",
"name": "oozie_proxy_user_groups_list",
"value": "*"
},
{
"desc": "Whether users and groups specified in Admin ACL should be checked for authorization to perform admin operations.",
"display_name": "Enable ResourceManager ACLs",
"name": "yarn_acl_enable",
"value": "true"
},
{
"desc": "The group that this service's processes should run as. (Except the Job History server, which has its own group)",
"display_name": "System Group",
"name": "process_groupname",
"value": "hadoop"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>hadoop-policy.xml</strong>. Applies to configurations of all roles in this service except client configuration.",
"display_name": "YARN Service Advanced Configuration Snippet (Safety Valve) for hadoop-policy.xml",
"name": "yarn_hadoop_policy_config_safety_valve",
"value": null
},
{
"desc": "Comma-delimited list of hosts where you want to allow the Hive user to impersonate other users. The default '*' allows all hosts. To disable entirely, use a string that doesn't correspond to a host name, such as '_no_host'.",
"display_name": "Hive Proxy User Hosts",
"name": "hive_proxy_user_hosts_list",
"value": "*"
},
{
"desc": "When set, Cloudera Manager will send alerts when the health of this service reaches the threshold specified by the EventServer setting eventserver_health_events_alert_threshold",
"display_name": "Enable Service Level Health Alerts",
"name": "enable_alerts",
"value": "true"
},
{
"desc": "The frequency in which the log4j event publication appender will retry sending undelivered log events to the Event server, in seconds",
"display_name": "Log Event Retry Frequency",
"name": "log_event_retry_frequency",
"value": "30"
},
{
"desc": "<p>The configured triggers for this service. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.</p><p>Each trigger has all of the following fields:</p><ul><li><span class='code'>triggerName</span> <strong>(mandatory)</strong> - the name of the trigger. This value must be unique for the specific service. </li><li><span class='code'>triggerExpression</span> <strong>(mandatory)</strong> - a tsquery expression representing the trigger. <li><span class='code'>streamThreshold</span> <strong>(optional)</strong> - the maximum number of streams that can satisfy a condition of a trigger before the condition fires. By default set to 0, and any stream returned will cause the condition to fire. <li><span class='code'>enabled</span> <strong> (optional)</strong> - by default set to 'true'. If set to 'false' the trigger will not be evaluated.</p><p>For example, here is a JSON formatted trigger that fires if there are more than 10 DataNodes with more than 500 file-descriptors opened:</p><p><pre>[{\"triggerName\": \"sample-trigger\",\n \"triggerExpression\": \"IF (SELECT fd_open WHERE roleType = DataNode and last(fd_open) > 500) DO health:red\",\n \"streamThreshold\": 10, \"enabled\": \"true\"}]</pre></p><p>Consult the trigger rules documentation for more details on how to write triggers using tsquery.</p><p>The JSON format is evolving and may change in the future and as a result backward compatibility is not guaranteed between releases at this time.</p>",
"display_name": "Service Triggers",
"name": "service_triggers",
"value": "[]"
},
{
"desc": "Controls which applications an admin user can see in the applications list view",
"display_name": "Admin Users Applications List Visibility Settings",
"name": "admin_application_list_settings",
"value": "ALL"
},
{
"desc": "Whether YARN uses the Linux Container Executor both in secure (Kerberos) and insecure (not Kerberos) environments. Cgroups enforcement only works when the Linux Container Executor is used.",
"display_name": "Always Use Linux Container Executor",
"name": "yarn_service_lce_always",
"value": "false"
},
{
"desc": "When set, each role will identify important log events and forward them to Cloudera Manager.",
"display_name": "Enable Log Event Capture",
"name": "catch_events",
"value": "true"
},
{
"desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.",
"display_name": "Enable Configuration Change Alerts",
"name": "enable_config_alerts",
"value": "false"
},
{
"desc": "Comma-delimited list of groups that you want to allow the Llama (AM for Impala) user to impersonate. The default '*' allows all groups. To disable entirely, use a string that doesn't correspond to a group name, such as '_no_group_'.",
"display_name": "Llama Proxy User Groups",
"name": "llama_proxy_user_groups_list",
"value": "*"
},
{
"desc": "Controls the aggregate metrics generated for YARN applications. The structure is a JSON list of the attributes to aggregate and the entities to aggregate to. For example, if the attributeName is 'maps_completed' and the aggregationTargets is ['USER'] then the Service Monitor will create the metric 'yarn_application_maps_completed_rate' and, every ten minutes, will record the total maps completed for each user across all their YARN applications. By default it will also record the number of applications submitted ('apps_submitted_rate') for both users and pool. For a full list of the supported attributes see the YARN search page. Note that the valid aggregation targets are USER, YARN_POOL, and YARN (the service), and that these aggregate metrics can be viewed on both the reports and charts search pages.",
"display_name": "YARN Application Aggregates",
"name": "yarn_application_aggregates",
"value": "[\n {\n \"attributeName\": \"maps_total\",\n \"aggregationTargets\": [\"USER\", \"YARN_POOL\", \"YARN\"]\n },\n {\n \"attributeName\": \"reduces_total\",\n \"aggregationTargets\": [\"USER\", \"YARN_POOL\", \"YARN\"]\n },\n {\n \"attributeName\": \"cpu_milliseconds\",\n \"aggregationTargets\": [\"USER\", \"YARN_POOL\", \"YARN\"]\n },\n {\n \"attributeName\": \"mb_millis_maps\",\n \"aggregationTargets\": [\"USER\", \"YARN_POOL\", \"YARN\"]\n },\n {\n \"attributeName\": \"mb_millis_reduces\",\n \"aggregationTargets\": [\"USER\", \"YARN_POOL\", \"YARN\"]\n },\n {\n \"attributeName\": \"vcores_millis_maps\",\n \"aggregationTargets\": [\"USER\", \"YARN_POOL\", \"YARN\"]\n },\n {\n \"attributeName\": \"vcores_millis_reduces\",\n \"aggregationTargets\": [\"USER\", \"YARN_POOL\", \"YARN\"]\n },\n {\n \"attributeName\": \"file_bytes_read\",\n \"aggregationTargets\": [\"USER\", \"YARN_POOL\", \"YARN\"]\n },\n {\n \"attributeName\": \"file_bytes_written\",\n \"aggregationTargets\": [\"USER\", \"YARN_POOL\", \"YARN\"]\n },\n {\n \"attributeName\": \"hdfs_bytes_read\",\n \"aggregationTargets\": [\"USER\", \"YARN_POOL\", \"YARN\"]\n },\n {\n \"attributeName\": \"hdfs_bytes_written\",\n \"aggregationTargets\": [\"USER\", \"YARN_POOL\", \"YARN\"]\n }\n]\n"
},
{
"desc": "Comma-delimited list of hosts where you want to allow the Llama (AM for Impala) user to impersonate other users. The default '*' allows all hosts. To disable entirely, use a string that doesn't correspond to a host name, such as '_no_host'.",
"display_name": "Llama Proxy User Hosts",
"name": "llama_proxy_user_hosts_list",
"value": "*"
},
{
"desc": "UNIX user that containers run as when Linux-container-executor is used in nonsecure mode.",
"display_name": "UNIX User for Nonsecure Mode with Linux Container Executor",
"name": "yarn_nodemanager_linux_container_executor_nonsecure_mode_local_user",
"value": "nobody"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>mapred-site.xml</strong>. Applies to configurations of all roles in this service except client configuration.",
"display_name": "YARN Service MapReduce Advanced Configuration Snippet (Safety Valve)",
"name": "yarn_service_mapred_safety_valve",
"value": null
},
{
"desc": "For advanced use only, key-value pairs (one on each line) to be inserted into a role's environment. Applies to configurations of all roles in this service except client configuration.",
"display_name": "YARN Service Environment Advanced Configuration Snippet (Safety Valve)",
"name": "yarn_service_env_safety_valve",
"value": null
},
{
"desc": "Whether to enable log aggregation",
"display_name": "Enable Log Aggregation",
"name": "yarn_log_aggregation_enable",
"value": "true"
},
{
"desc": "For advanced use only, a list of derived configuration properties that will be used by the Service Monitor instead of the default ones.",
"display_name": "Service Monitor Derived Configs Advanced Configuration Snippet (Safety Valve)",
"name": "smon_derived_configs_safety_valve",
"value": null
}
]

View File

@ -0,0 +1,48 @@
# Copyright (c) 2014 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from sahara.plugins.general import utils as u
def get_manager(cluster):
return u.get_instance(cluster, 'MANAGER')
def get_namenode(cluster):
return u.get_instance(cluster, "NAMENODE")
def get_resourcemanager(cluster):
return u.get_instance(cluster, 'RESOURCEMANAGER')
def get_nodemanagers(cluster):
return u.get_instances(cluster, 'NODEMANAGER')
def get_oozie(cluster):
return u.get_instance(cluster, 'OOZIE_SERVER')
def get_datanodes(cluster):
return u.get_instances(cluster, 'DATANODE')
def get_secondarynamenode(cluster):
return u.get_instance(cluster, 'SECONDARYNAMENODE')
def get_historyserver(cluster):
return u.get_instance(cluster, 'JOBHISTORY')

View File

@ -0,0 +1,125 @@
# Copyright (c) 2014 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from sahara.i18n import _
from sahara.i18n import _LE
from sahara.openstack.common import log as logging
from sahara.plugins.cdh import cloudera_utils as cmu
from sahara.plugins.cdh import utils as cu
from sahara.plugins.general import exceptions as ex
from sahara.plugins.general import utils as u
from sahara.utils import general as gu
LOG = logging.getLogger(__name__)
def validate_cluster_creating(cluster):
if not cmu.have_cm_api_libs():
LOG.error(_LE("For provisioning cluster with CDH plugin install"
"'cm_api' package version 6.0.2 or later."))
raise ex.HadoopProvisionError(_("'cm_api' is not installed."))
mng_count = _get_inst_count(cluster, 'MANAGER')
if mng_count != 1:
raise ex.InvalidComponentCountException('MANAGER', 1, mng_count)
nn_count = _get_inst_count(cluster, 'NAMENODE')
if nn_count != 1:
raise ex.InvalidComponentCountException('NAMENODE', 1, nn_count)
snn_count = _get_inst_count(cluster, 'SECONDARYNAMENODE')
if snn_count != 1:
raise ex.InvalidComponentCountException('SECONDARYNAMENODE', 1,
snn_count)
rm_count = _get_inst_count(cluster, 'RESOURCEMANAGER')
if rm_count not in [0, 1]:
raise ex.InvalidComponentCountException('RESOURCEMANAGER', '0 or 1',
rm_count)
hs_count = _get_inst_count(cluster, 'JOBHISTORY')
if hs_count not in [0, 1]:
raise ex.InvalidComponentCountException('JOBHISTORY', '0 or 1',
hs_count)
if rm_count > 0 and hs_count < 1:
raise ex.RequiredServiceMissingException('JOBHISTORY',
required_by='RESOURCEMANAGER')
nm_count = _get_inst_count(cluster, 'NODEMANAGER')
if rm_count == 0:
if nm_count > 0:
raise ex.RequiredServiceMissingException('RESOURCEMANAGER',
required_by='NODEMANAGER')
oo_count = _get_inst_count(cluster, 'OOZIE_SERVER')
dn_count = _get_inst_count(cluster, 'DATANODE')
if oo_count not in [0, 1]:
raise ex.InvalidComponentCountException('OOZIE_SERVER', '0 or 1',
oo_count)
if oo_count == 1:
if dn_count < 1:
raise ex.RequiredServiceMissingException(
'DATANODE', required_by='OOZIE_SERVER')
if nm_count < 1:
raise ex.RequiredServiceMissingException(
'NODEMANAGER', required_by='OOZIE_SERVER')
if hs_count != 1:
raise ex.RequiredServiceMissingException(
'JOBHISTORY', required_by='OOZIE_SERVER')
def validate_additional_ng_scaling(cluster, additional):
rm = cu.get_resourcemanager(cluster)
scalable_processes = _get_scalable_processes()
for ng_id in additional:
ng = gu.get_by_id(cluster.node_groups, ng_id)
if not set(ng.node_processes).issubset(scalable_processes):
msg = _("CDH plugin cannot scale nodegroup with processes: "
"%(processes)s")
raise ex.NodeGroupCannotBeScaled(
ng.name, msg % {'processes': ' '.join(ng.node_processes)})
if not rm and 'NODEMANAGER' in ng.node_processes:
msg = _("CDH plugin cannot scale node group with processes "
"which have no master-processes run in cluster")
raise ex.NodeGroupCannotBeScaled(ng.name, msg)
def validate_existing_ng_scaling(cluster, existing):
scalable_processes = _get_scalable_processes()
dn_to_delete = 0
for ng in cluster.node_groups:
if ng.id in existing:
if ng.count > existing[ng.id] and "datanode" in ng.node_processes:
dn_to_delete += ng.count - existing[ng.id]
if not set(ng.node_processes).issubset(scalable_processes):
msg = _("CDH plugin cannot scale nodegroup with processes: "
"%(processes)s")
raise ex.NodeGroupCannotBeScaled(
ng.name, msg % {'processes': ' '.join(ng.node_processes)})
def _get_scalable_processes():
return ['DATANODE', 'NODEMANAGER']
def _get_inst_count(cluster, process):
return sum([ng.count for ng in u.get_node_groups(cluster, process)])

View File

@ -37,6 +37,19 @@ def load_hadoop_xml_defaults(file_name):
return configs
def parse_hadoop_xml_with_name_and_value(data):
doc = xml.parseString(data)
configs = []
prop = doc.getElementsByTagName('property')
for elements in prop:
configs.append({
'name': _get_text_from_node(elements, 'name'),
'value': _get_text_from_node(elements, 'value')
})
return configs
def _get_node_element(element, name):
element = element.getElementsByTagName(name)
return element[0] if element and element[0].hasChildNodes() else None

View File

@ -38,6 +38,7 @@ console_scripts =
sahara.cluster.plugins =
vanilla = sahara.plugins.vanilla.plugin:VanillaProvider
hdp = sahara.plugins.hdp.ambariplugin:AmbariPlugin
cdh = sahara.plugins.cdh.plugin:CDHPluginProvider
fake = sahara.plugins.fake.plugin:FakePluginProvider
spark = sahara.plugins.spark.plugin:SparkProvider