Merge "Add Hive support to CDH plugin"

This commit is contained in:
Jenkins 2014-10-22 20:38:19 +00:00 committed by Gerrit Code Review
commit bec8aa5ca4
15 changed files with 1062 additions and 10 deletions

View File

@ -12,6 +12,7 @@ recursive-include sahara/locale *
include sahara/plugins/cdh/resources/cdh_config.py
include sahara/plugins/cdh/resources/*.sh
include sahara/plugins/cdh/resources/*.json
include sahara/plugins/cdh/resources/*.sql
include sahara/plugins/vanilla/hadoop2/resources/*.sh
include sahara/plugins/vanilla/hadoop2/resources/*.sql
include sahara/plugins/vanilla/hadoop2/resources/*.template

View File

@ -33,6 +33,7 @@ CM_DEFAULT_PASSWD = 'admin'
HDFS_SERVICE_NAME = 'hdfs01'
YARN_SERVICE_NAME = 'yarn01'
OOZIE_SERVICE_NAME = 'oozie01'
HIVE_SERVICE_NAME = 'hive01'
def have_cm_api_libs():
@ -93,6 +94,8 @@ def get_service(process, cluster=None, instance=None):
return cm_cluster.get_service(YARN_SERVICE_NAME)
elif process in ['OOZIE_SERVER']:
return cm_cluster.get_service(OOZIE_SERVICE_NAME)
elif process in ['HIVESERVER2', 'HIVEMETASTORE', 'WEBHCAT']:
return cm_cluster.get_service(HIVE_SERVICE_NAME)
else:
raise ValueError(
_("Process %(process)s is not supported by CDH plugin") %
@ -133,17 +136,20 @@ def update_configs(instance):
def get_role_name(instance, service):
# NOTE: role name must match regexp "[_A-Za-z][-_A-Za-z0-9]{0,63}"
shortcuts = {
'NAMENODE': 'NN',
'ALERTPUBLISHER': 'AP',
'DATANODE': 'DN',
'SECONDARYNAMENODE': 'SNN',
'RESOURCEMANAGER': 'RM',
'NODEMANAGER': 'NM',
'JOBHISTORY': 'JS',
'OOZIE_SERVER': 'OS',
'SERVICEMONITOR': 'SM',
'HOSTMONITOR': 'HM',
'EVENTSERVER': 'ES',
'ALERTPUBLISHER': 'AP'
'HIVEMETASTORE': 'HVM',
'HIVESERVER2': 'HVS',
'HOSTMONITOR': 'HM',
'JOBHISTORY': 'JS',
'NAMENODE': 'NN',
'NODEMANAGER': 'NM',
'OOZIE_SERVER': 'OS',
'RESOURCEMANAGER': 'RM',
'SECONDARYNAMENODE': 'SNN',
'SERVICEMONITOR': 'SM',
'WEBHCAT': 'WHC'
}
return '%s_%s' % (shortcuts.get(service, service),
instance.hostname().replace('-', '_'))
@ -195,3 +201,14 @@ def create_oozie_db(oozie_service):
@cloudera_cmd
def install_oozie_sharelib(oozie_service):
yield oozie_service.install_oozie_sharelib()
@cloudera_cmd
def create_hive_metastore_db(hive_service):
yield hive_service.create_hive_metastore_tables()
@cloudera_cmd
def create_hive_dirs(hive_service):
yield hive_service.create_hive_userdir()
yield hive_service.create_hive_warehouse()

View File

@ -41,6 +41,11 @@ def is_pre_installed_cdh(remote):
def start_cloudera_db(remote):
_root(remote, 'service cloudera-scm-server-db start')
# for Hive access
hive_access_param = 'host metastore hive 0.0.0.0/0 md5'
remote.append_to_file('/var/lib/cloudera-scm-server-db/data/pg_hba.conf',
hive_access_param, run_as_root=True)
_root(remote, 'service cloudera-scm-server-db restart')
def start_manager(remote):
@ -93,3 +98,7 @@ def add_apt_key(remote, key_url):
def add_centos_repository(r, repo_list_url, repo_name):
push_remote_file(r, repo_list_url, '/etc/yum.repos.d/%s.repo' % repo_name)
def start_mysql_server(remote):
_root(remote, 'service mysql start')

View File

@ -91,6 +91,10 @@ nodemanager_confs = _load_json(path_to_config + 'yarn-nodemanager.json')
jobhistory_confs = _load_json(path_to_config + 'yarn-jobhistory.json')
oozie_service_confs = _load_json(path_to_config + 'oozie-service.json')
oozie_role_confs = _load_json(path_to_config + 'oozie-oozie.json')
hive_service_confs = _load_json(path_to_config + 'hive-service.json')
hive_metastore_confs = _load_json(path_to_config + 'hive-metastore.json')
hive_hiveserver_confs = _load_json(path_to_config + 'hive-hiveserver2.json')
hive_webhcat_confs = _load_json(path_to_config + 'hive-webhcat.json')
priority_one_confs = _load_json(path_to_config + 'priority-one-confs.json')
@ -126,6 +130,10 @@ def _get_ng_plugin_configs():
cfg += _init_configs(jobhistory_confs, 'JOBHISTORY', 'node')
cfg += _init_configs(oozie_service_confs, 'OOZIE', 'cluster')
cfg += _init_configs(oozie_role_confs, 'OOZIE', 'node')
cfg += _init_configs(hive_service_confs, 'HIVE', 'cluster')
cfg += _init_configs(hive_metastore_confs, 'HIVEMETASTORE', 'node')
cfg += _init_configs(hive_hiveserver_confs, 'HIVESERVER', 'node')
cfg += _init_configs(hive_webhcat_confs, 'WEBHCAT', 'node')
return cfg

View File

@ -0,0 +1,53 @@
# Copyright (c) 2014 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import uuid
import six
from sahara import conductor
from sahara import context
from sahara.utils import files
conductor = conductor.API
def get_hive_db_password(cluster):
ctx = context.ctx()
cluster = conductor.cluster_get(ctx, cluster.id)
passwd = cluster.extra.get('hive_db_password') if cluster.extra else None
if passwd:
return passwd
passwd = six.text_type(uuid.uuid4())
extra = cluster.extra.to_dict() if cluster.extra else {}
extra['hive_db_password'] = passwd
cluster = conductor.cluster_update(ctx, cluster, {'extra': extra})
return passwd
def create_hive_database(cluster, remote):
db_password = get_hive_db_password(cluster)
create_db_script = files.get_file_text(
'plugins/cdh/resources/create_hive_db.sql')
create_db_script = create_db_script % db_password
script_name = 'create_hive_db.sql'
remote.write_file_to(script_name, create_db_script)
psql_cmd = ('PGPASSWORD=$(sudo head -1 /var/lib/cloudera-scm-server-db'
'/data/generated_password.txt) psql -U cloudera-scm '
'-h localhost -p 7432 -d scm -f %s') % script_name
remote.execute_command(psql_cmd)
remote.execute_command('rm %s' % script_name)

View File

@ -25,6 +25,7 @@ from sahara.openstack.common import log as logging
from sahara.plugins.cdh import cloudera_utils as cu
from sahara.plugins.cdh import commands as cmd
from sahara.plugins.cdh import config_helper as c_helper
from sahara.plugins.cdh import db_helper
from sahara.plugins.cdh import utils as pu
from sahara.plugins import exceptions as ex
from sahara.plugins import utils as gu
@ -38,6 +39,7 @@ CDH_VERSION = 'CDH5'
HDFS_SERVICE_TYPE = 'HDFS'
YARN_SERVICE_TYPE = 'YARN'
OOZIE_SERVICE_TYPE = 'OOZIE'
HIVE_SERVICE_TYPE = 'HIVE'
PATH_TO_CORE_SITE_XML = '/etc/hadoop/conf/core-site.xml'
HADOOP_LIB_DIR = '/usr/lib/hadoop-mapreduce'
@ -54,6 +56,8 @@ PACKAGES = [
'hadoop-mapreduce-historyserver',
'hadoop-yarn-nodemanager',
'hadoop-yarn-resourcemanager',
'hive-metastore',
'hive-server2',
'oozie',
'oracle-j2sdk1.7',
]
@ -113,6 +117,19 @@ def _get_configs(service, cluster=None, node_group=None):
all_confs = _merge_dicts(all_confs, ng_default_confs)
if cluster:
hive_confs = {
'HIVE': {
'hive_metastore_database_type': 'postgresql',
'hive_metastore_database_host':
pu.get_manager(cluster).internal_ip,
'hive_metastore_database_port': '7432',
'hive_metastore_database_password':
db_helper.get_hive_db_password(cluster),
'mapreduce_yarn_service': cu.YARN_SERVICE_NAME
}
}
all_confs = _merge_dicts(all_confs, hive_confs)
all_confs = _merge_dicts(all_confs, cluster.cluster_configs)
return all_confs.get(service, {})
@ -310,6 +327,8 @@ def _create_services(cluster):
cm_cluster.create_service(cu.HDFS_SERVICE_NAME, HDFS_SERVICE_TYPE)
cm_cluster.create_service(cu.YARN_SERVICE_NAME, YARN_SERVICE_TYPE)
cm_cluster.create_service(cu.OOZIE_SERVICE_NAME, OOZIE_SERVICE_TYPE)
if pu.get_hive_metastore(cluster):
cm_cluster.create_service(cu.HIVE_SERVICE_NAME, HIVE_SERVICE_TYPE)
def _configure_services(cluster):
@ -324,6 +343,10 @@ def _configure_services(cluster):
oozie = cm_cluster.get_service(cu.OOZIE_SERVICE_NAME)
oozie.update_config(_get_configs(OOZIE_SERVICE_TYPE, cluster=cluster))
if pu.get_hive_metastore(cluster):
hive = cm_cluster.get_service(cu.HIVE_SERVICE_NAME)
hive.update_config(_get_configs(HIVE_SERVICE_TYPE, cluster=cluster))
def _configure_instances(instances):
for inst in instances:
@ -369,6 +392,20 @@ def _configure_swift_to_inst(instance):
r.write_file_to(PATH_TO_CORE_SITE_XML, new_core_site, run_as_root=True)
def _configure_hive(cluster):
manager = pu.get_manager(cluster)
with manager.remote() as r:
db_helper.create_hive_database(cluster, r)
# Hive requires /tmp/hive-hive directory
namenode = pu.get_namenode(cluster)
with namenode.remote() as r:
r.execute_command(
'sudo su - -c "hadoop fs -mkdir -p /tmp/hive-hive" hdfs')
r.execute_command(
'sudo su - -c "hadoop fs -chown hive /tmp/hive-hive" hdfs')
def start_cluster(cluster):
cm_cluster = cu.get_cloudera_cluster(cluster)
@ -384,3 +421,10 @@ def start_cluster(cluster):
cu.create_oozie_db(oozie)
cu.install_oozie_sharelib(oozie)
cu.start_service(oozie)
if pu.get_hive_metastore(cluster):
hive = cm_cluster.get_service(cu.HIVE_SERVICE_NAME)
_configure_hive(cluster)
cu.create_hive_metastore_db(hive)
cu.create_hive_dirs(hive)
cu.start_service(hive)

View File

@ -49,7 +49,11 @@ class CDHPluginProvider(p.ProvisioningPluginBase):
"RESOURCEMANAGER": ['RESOURCEMANAGER'],
"NODEMANAGER": ['NODEMANAGER'],
"JOBHISTORY": ['JOBHISTORY'],
"OOZIE": ['OOZIE_SERVER']
"OOZIE": ['OOZIE_SERVER'],
"HIVE": [],
"HIVESERVER": ['HIVESERVER2'],
"HIVEMETASTORE": ['HIVEMETASTORE'],
"WEBHCAT": ['WEBHCAT']
}
def get_configs(self, hadoop_version):

View File

@ -28,6 +28,7 @@ cm_password = 'admin'
hdfs_service_name = 'hdfs01'
yarn_service_name = 'yarn01'
oozie_service_name = 'oozie01'
hive_service_name = 'hive01'
def get_cm_api():
@ -86,5 +87,8 @@ def main():
oozie = cluster.get_service(oozie_service_name)
process_service(oozie, 'oozie')
hive = cluster.get_service(hive_service_name)
process_service(hive, 'hive')
if __name__ == '__main__':
main()

View File

@ -0,0 +1,4 @@
CREATE ROLE hive LOGIN PASSWORD '%s';
CREATE DATABASE metastore OWNER hive encoding 'UTF8';
GRANT ALL PRIVILEGES ON DATABASE metastore TO hive;
COMMIT;

View File

@ -0,0 +1,194 @@
[
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory. Specified as a percentage of the capacity on that filesystem. This setting is not used if a Log Directory Free Space Monitoring Absolute Thresholds setting is configured.",
"display_name": "Log Directory Free Space Monitoring Percentage Thresholds",
"name": "log_directory_free_space_percentage_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"never\"}"
},
{
"desc": "HiveServer2 will impersonate the beeline client user when talking to other services such as Mapreduce and Hdfs.",
"display_name": "HiveServer2 Enable Impersonation",
"name": "hiveserver2_enable_impersonation",
"value": "true"
},
{
"desc": "Minimum number of worker threads in HiveServer2's thread pool",
"display_name": "Min HiveServer2 Threads",
"name": "hiveserver2_min_threads",
"value": "5"
},
{
"desc": "Port on which HiveServer2 will listen for connections.",
"display_name": "HiveServer2 Port",
"name": "hs2_thrift_address_port",
"value": "10000"
},
{
"desc": "The maximum size, in megabytes, per log file for HiveServer2 logs. Typically used by log4j.",
"display_name": "HiveServer2 Max Log Size",
"name": "max_log_size",
"value": "200"
},
{
"desc": "The maximum number of rolled log files to keep for HiveServer2 logs. Typically used by log4j.",
"display_name": "HiveServer2 Maximum Log File Backups",
"name": "max_log_backup_index",
"value": "10"
},
{
"desc": "<p>The configured triggers for this role. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.</p><p>Each trigger has all of the following fields:</p><ul><li><code>triggerName</code> <strong>(mandatory)</strong> - the name of the trigger. This value must be unique for the specific role. </li><li><code>triggerExpression</code> <strong>(mandatory)</strong> - a tsquery expression representing the trigger. </li><li><code>streamThreshold</code> <strong>(optional)</strong> - the maximum number of streams that can satisfy a condition of a trigger before the condition fires. By default set to 0, and any stream returned will cause the condition to fire. </li><li><code>enabled</code> <strong> (optional)</strong> - by default set to 'true'. If set to 'false' the trigger will not be evaluated.</li></ul></p><p>For example, here is a JSON formatted trigger configured for a DataNode that fires if the DataNode has more than 1500 file-descriptors opened:</p><p><pre>[{\"triggerName\": \"sample-trigger\",\n \"triggerExpression\": \"IF (SELECT fd_open WHERE roleName=$ROLENAME and last(fd_open) > 1500) DO health:bad\",\n \"streamThreshold\": 0, \"enabled\": \"true\"}]</pre></p><p>Consult the trigger rules documentation for more details on how to write triggers using tsquery.</p><p>The JSON format is evolving and may change in the future and as a result backward compatibility is not guaranteed between releases at this time.</p>",
"display_name": "Role Triggers",
"name": "role_triggers",
"value": "[]"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>hive-site.xml</strong> for this role only.",
"display_name": "HiveServer2 Advanced Configuration Snippet (Safety Valve) for hive-site.xml",
"name": "hive_hs2_config_safety_valve",
"value": null
},
{
"desc": "Soft memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process if and only if the host is facing memory pressure. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Soft Limit",
"name": "rm_memory_soft_limit",
"value": "-1"
},
{
"desc": "For advanced use only, key-value pairs (one on each line) to be inserted into a role's environment. Applies to configurations of this role except client configuration.",
"display_name": "HiveServer2 Environment Advanced Configuration Snippet (Safety Valve)",
"name": "hive_hs2_env_safety_valve",
"value": null
},
{
"desc": "Hard memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Hard Limit",
"name": "rm_memory_hard_limit",
"value": "-1"
},
{
"desc": "The period to review when computing unexpected exits.",
"display_name": "Unexpected Exits Monitoring Period",
"name": "unexpected_exits_window",
"value": "5"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>log4j.properties</strong> for this role only.",
"display_name": "HiveServer2 Logging Advanced Configuration Snippet (Safety Valve)",
"name": "log4j_safety_valve",
"value": null
},
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory.",
"display_name": "Log Directory Free Space Monitoring Absolute Thresholds",
"name": "log_directory_free_space_absolute_thresholds",
"value": "{\"critical\":\"5.36870912E9\",\"warning\":\"1.073741824E10\"}"
},
{
"desc": "When computing the overall HiveServer2 health, consider the host's health.",
"display_name": "HiveServer2 Host Health Test",
"name": "hiveserver2_host_health_enabled",
"value": "true"
},
{
"desc": "If configured, overrides the process soft and hard rlimits (also called ulimits) for file descriptors to the configured value.",
"display_name": "Maximum Process File Descriptors",
"name": "rlimit_fds",
"value": null
},
{
"desc": "When set, Cloudera Manager will send alerts when the health of this role reaches the threshold specified by the EventServer setting eventserver_health_events_alert_threshold",
"display_name": "Enable Health Alerts for this Role",
"name": "enable_alerts",
"value": "true"
},
{
"desc": "When set, this role's process is automatically (and transparently) restarted in the event of an unexpected failure.",
"display_name": "Automatically Restart Process",
"name": "process_auto_restart",
"value": "false"
},
{
"desc": "When set, a SIGKILL signal is sent to the role process when java.lang.OutOfMemoryError is thrown.",
"display_name": "Kill When Out of Memory",
"name": "oom_sigkill_enabled",
"value": "true"
},
{
"desc": "When set, generates heap dump file when java.lang.OutOfMemoryError is thrown.",
"display_name": "Dump Heap When Out of Memory",
"name": "oom_heap_dump_enabled",
"value": "false"
},
{
"desc": "The health test thresholds for unexpected exits encountered within a recent period specified by the unexpected_exits_window configuration for the role.",
"display_name": "Unexpected Exits Thresholds",
"name": "unexpected_exits_thresholds",
"value": "{\"critical\":\"any\",\"warning\":\"never\"}"
},
{
"desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.",
"display_name": "Enable Configuration Change Alerts",
"name": "enable_config_alerts",
"value": "false"
},
{
"desc": "Weight for the read I/O requests issued by this role. The greater the weight, the higher the priority of the requests when the host experiences I/O contention. Must be between 100 and 1000. Defaults to 1000 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup I/O Weight",
"name": "rm_io_weight",
"value": "500"
},
{
"desc": "Enables the health test that the HiveServer2's process state is consistent with the role configuration",
"display_name": "HiveServer2 Process Health Test",
"name": "hiveserver2_scm_health_enabled",
"value": "true"
},
{
"desc": "Maximum size in bytes for the Java Process heap memory. Passed to Java -Xmx.",
"display_name": "Java Heap Size of HiveServer2 in Bytes",
"name": "hiveserver2_java_heapsize",
"value": "268435456"
},
{
"desc": "Number of CPU shares to assign to this role. The greater the number of shares, the larger the share of the host's CPUs that will be given to this role when the host experiences CPU contention. Must be between 2 and 262144. Defaults to 1024 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup CPU Shares",
"name": "rm_cpu_shares",
"value": "1024"
},
{
"desc": "Path to directory where heap dumps are generated when java.lang.OutOfMemoryError error is thrown. This directory is automatically created if it doesn't exist. However, if this directory already exists, role user must have write access to this directory. If this directory is shared amongst multiple roles, it should have 1777 permissions. Note that the heap dump files are created with 600 permissions and are owned by the role user. The amount of free space in this directory should be greater than the maximum Java Process heap size configured for this role.",
"display_name": "Heap Dump Directory",
"name": "oom_heap_dump_dir",
"value": "/tmp"
},
{
"desc": "Directory where HiveServer2 will place its log files.",
"display_name": "HiveServer2 Log Directory",
"name": "hive_log_dir",
"value": "/var/log/hive"
},
{
"desc": "Maximum number of worker threads in HiveServer2's thread pool",
"display_name": "Max HiveServer2 Threads",
"name": "hiveserver2_max_threads",
"value": "100"
},
{
"desc": "The health test thresholds of the number of file descriptors used. Specified as a percentage of file descriptor limit.",
"display_name": "File Descriptor Monitoring Thresholds",
"name": "hiveserver2_fd_thresholds",
"value": "{\"critical\":\"70.0\",\"warning\":\"50.0\"}"
},
{
"desc": "The minimum log level for HiveServer2 logs",
"display_name": "HiveServer2 Logging Threshold",
"name": "log_threshold",
"value": "INFO"
},
{
"desc": "These arguments will be passed as part of the Java command line. Commonly, garbage collection flags or extra debugging flags would be passed here.",
"display_name": "Java Configuration Options for HiveServer2",
"name": "hiveserver2_java_opts",
"value": "-XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:-CMSConcurrentMTEnabled -XX:CMSInitiatingOccupancyFraction=70 -XX:+CMSParallelRemarkEnabled"
}
]

View File

@ -0,0 +1,194 @@
[
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory. Specified as a percentage of the capacity on that filesystem. This setting is not used if a Log Directory Free Space Monitoring Absolute Thresholds setting is configured.",
"display_name": "Log Directory Free Space Monitoring Percentage Thresholds",
"name": "log_directory_free_space_percentage_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"never\"}"
},
{
"desc": "These arguments will be passed as part of the Java command line. Commonly, garbage collection flags or extra debugging flags would be passed here.",
"display_name": "Java Configuration Options for Hive Metastore Server",
"name": "hive_metastore_java_opts",
"value": "-XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:-CMSConcurrentMTEnabled -XX:CMSInitiatingOccupancyFraction=70 -XX:+CMSParallelRemarkEnabled"
},
{
"desc": "Maximum number of worker threads in the Hive Metastore Server's thread pool",
"display_name": "Max Hive Metastore Server Threads",
"name": "hive_metastore_max_threads",
"value": "100000"
},
{
"desc": "Enables the health test that checks that basic Hive Metastore operations succeed",
"display_name": "Hive Metastore Canary Health Test",
"name": "metastore_canary_health_enabled",
"value": "true"
},
{
"desc": "Enables the health test that the Hive Metastore Server's process state is consistent with the role configuration",
"display_name": "Hive Metastore Server Process Health Test",
"name": "hivemetastore_scm_health_enabled",
"value": "true"
},
{
"desc": "When set, this role's process is automatically (and transparently) restarted in the event of an unexpected failure.",
"display_name": "Automatically Restart Process",
"name": "process_auto_restart",
"value": "false"
},
{
"desc": "The maximum number of rolled log files to keep for Hive Metastore Server logs. Typically used by log4j.",
"display_name": "Hive Metastore Server Maximum Log File Backups",
"name": "max_log_backup_index",
"value": "10"
},
{
"desc": "<p>The configured triggers for this role. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.</p><p>Each trigger has all of the following fields:</p><ul><li><code>triggerName</code> <strong>(mandatory)</strong> - the name of the trigger. This value must be unique for the specific role. </li><li><code>triggerExpression</code> <strong>(mandatory)</strong> - a tsquery expression representing the trigger. </li><li><code>streamThreshold</code> <strong>(optional)</strong> - the maximum number of streams that can satisfy a condition of a trigger before the condition fires. By default set to 0, and any stream returned will cause the condition to fire. </li><li><code>enabled</code> <strong> (optional)</strong> - by default set to 'true'. If set to 'false' the trigger will not be evaluated.</li></ul></p><p>For example, here is a JSON formatted trigger configured for a DataNode that fires if the DataNode has more than 1500 file-descriptors opened:</p><p><pre>[{\"triggerName\": \"sample-trigger\",\n \"triggerExpression\": \"IF (SELECT fd_open WHERE roleName=$ROLENAME and last(fd_open) > 1500) DO health:bad\",\n \"streamThreshold\": 0, \"enabled\": \"true\"}]</pre></p><p>Consult the trigger rules documentation for more details on how to write triggers using tsquery.</p><p>The JSON format is evolving and may change in the future and as a result backward compatibility is not guaranteed between releases at this time.</p>",
"display_name": "Role Triggers",
"name": "role_triggers",
"value": "[]"
},
{
"desc": "Soft memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process if and only if the host is facing memory pressure. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Soft Limit",
"name": "rm_memory_soft_limit",
"value": "-1"
},
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory.",
"display_name": "Log Directory Free Space Monitoring Absolute Thresholds",
"name": "log_directory_free_space_absolute_thresholds",
"value": "{\"critical\":\"5.36870912E9\",\"warning\":\"1.073741824E10\"}"
},
{
"desc": "Hard memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Hard Limit",
"name": "rm_memory_hard_limit",
"value": "-1"
},
{
"desc": "Minimum number of worker threads in the Hive Metastore Server's thread pool",
"display_name": "Min Hive Metastore Server Threads",
"name": "hive_metastore_min_threads",
"value": "200"
},
{
"desc": "When computing the overall Hive Metastore Server health, consider the host's health.",
"display_name": "Hive Metastore Server Host Health Test",
"name": "hivemetastore_host_health_enabled",
"value": "true"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>log4j.properties</strong> for this role only.",
"display_name": "Hive Metastore Server Logging Advanced Configuration Snippet (Safety Valve)",
"name": "log4j_safety_valve",
"value": null
},
{
"desc": "Maximum size in bytes for the Java Process heap memory. Passed to Java -Xmx.",
"display_name": "Java Heap Size of Hive Metastore Server in Bytes",
"name": "hive_metastore_java_heapsize",
"value": "1073741824"
},
{
"desc": "The health test thresholds of the number of file descriptors used. Specified as a percentage of file descriptor limit.",
"display_name": "File Descriptor Monitoring Thresholds",
"name": "hivemetastore_fd_thresholds",
"value": "{\"critical\":\"70.0\",\"warning\":\"50.0\"}"
},
{
"desc": "Port on which Hive Metastore Server will listen for connections.",
"display_name": "Hive Metastore Server Port",
"name": "hive_metastore_port",
"value": "9083"
},
{
"desc": "If configured, overrides the process soft and hard rlimits (also called ulimits) for file descriptors to the configured value.",
"display_name": "Maximum Process File Descriptors",
"name": "rlimit_fds",
"value": null
},
{
"desc": "For advanced use only, a string to be inserted into <strong>hive-site.xml</strong> for this role only.",
"display_name": "Hive Metastore Server Advanced Configuration Snippet (Safety Valve) for hive-site.xml",
"name": "hive_metastore_config_safety_valve",
"value": null
},
{
"desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.",
"display_name": "Enable Configuration Change Alerts",
"name": "enable_config_alerts",
"value": "false"
},
{
"desc": "The maximum size, in megabytes, per log file for Hive Metastore Server logs. Typically used by log4j.",
"display_name": "Hive Metastore Server Max Log Size",
"name": "max_log_size",
"value": "200"
},
{
"desc": "When set, a SIGKILL signal is sent to the role process when java.lang.OutOfMemoryError is thrown.",
"display_name": "Kill When Out of Memory",
"name": "oom_sigkill_enabled",
"value": "true"
},
{
"desc": "When set, generates heap dump file when java.lang.OutOfMemoryError is thrown.",
"display_name": "Dump Heap When Out of Memory",
"name": "oom_heap_dump_enabled",
"value": "false"
},
{
"desc": "The period to review when computing unexpected exits.",
"display_name": "Unexpected Exits Monitoring Period",
"name": "unexpected_exits_window",
"value": "5"
},
{
"desc": "The health test thresholds for unexpected exits encountered within a recent period specified by the unexpected_exits_window configuration for the role.",
"display_name": "Unexpected Exits Thresholds",
"name": "unexpected_exits_thresholds",
"value": "{\"critical\":\"any\",\"warning\":\"never\"}"
},
{
"desc": "When set, Cloudera Manager will send alerts when the health of this role reaches the threshold specified by the EventServer setting eventserver_health_events_alert_threshold",
"display_name": "Enable Health Alerts for this Role",
"name": "enable_alerts",
"value": "true"
},
{
"desc": "Weight for the read I/O requests issued by this role. The greater the weight, the higher the priority of the requests when the host experiences I/O contention. Must be between 100 and 1000. Defaults to 1000 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup I/O Weight",
"name": "rm_io_weight",
"value": "500"
},
{
"desc": "Number of CPU shares to assign to this role. The greater the number of shares, the larger the share of the host's CPUs that will be given to this role when the host experiences CPU contention. Must be between 2 and 262144. Defaults to 1024 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup CPU Shares",
"name": "rm_cpu_shares",
"value": "1024"
},
{
"desc": "Path to directory where heap dumps are generated when java.lang.OutOfMemoryError error is thrown. This directory is automatically created if it doesn't exist. However, if this directory already exists, role user must have write access to this directory. If this directory is shared amongst multiple roles, it should have 1777 permissions. Note that the heap dump files are created with 600 permissions and are owned by the role user. The amount of free space in this directory should be greater than the maximum Java Process heap size configured for this role.",
"display_name": "Heap Dump Directory",
"name": "oom_heap_dump_dir",
"value": "/tmp"
},
{
"desc": "Directory where Hive Metastore Server will place its log files.",
"display_name": "Hive Metastore Server Log Directory",
"name": "hive_log_dir",
"value": "/var/log/hive"
},
{
"desc": "The minimum log level for Hive Metastore Server logs",
"display_name": "Hive Metastore Server Logging Threshold",
"name": "log_threshold",
"value": "INFO"
},
{
"desc": "For advanced use only, key-value pairs (one on each line) to be inserted into a role's environment. Applies to configurations of this role except client configuration.",
"display_name": "Hive Metastore Server Environment Advanced Configuration Snippet (Safety Valve)",
"name": "hive_metastore_env_safety_valve",
"value": null
}
]

View File

@ -0,0 +1,314 @@
[
{
"desc": "For advanced use only, a string to be inserted into <strong>hive-site.xml</strong>. Applies to configurations of all roles in this service except client configuration.",
"display_name": "Hive Service Advanced Configuration Snippet (Safety Valve) for hive-site.xml",
"name": "hive_service_config_safety_valve",
"value": null
},
{
"desc": "Directory containing auxiliary JARs used by Hive. This should be a directory location and not a classpath containing one or more JARs. This directory must be created and managed manually on Hive CLI or HiveServer2 host.",
"display_name": "Hive Auxiliary JARs Directory",
"name": "hive_aux_jars_path_dir",
"value": null
},
{
"desc": "Comma-delimited list of groups that you want to allow the Oozie user to impersonate. The default '*' allows all groups. To disable entirely, use a string that doesn't correspond to a group name, such as '_no_group_'.",
"display_name": "Oozie Proxy User Groups",
"name": "oozie_proxy_user_groups_list",
"value": "*"
},
{
"desc": "Password for Hive Metastore database",
"display_name": "Hive Metastore Database Password",
"name": "hive_metastore_database_password",
"value": ""
},
{
"desc": "Directory name where Hive Metastore's database is stored (only for Derby)",
"display_name": "Hive Metastore Derby Path",
"name": "hive_metastore_derby_path",
"value": "/var/lib/hive/cloudera_manager/derby/metastore_db"
},
{
"desc": "Hive warehouse directory is the location in HDFS where Hive's tables are stored. Note that Hive's default value for its warehouse directory is '/user/hive/warehouse'.",
"display_name": "Hive Warehouse Directory",
"name": "hive_warehouse_directory",
"value": "/user/hive/warehouse"
},
{
"desc": "SSL keystore password.",
"display_name": "Keystore Password",
"name": "hiveserver2_keystore_password",
"value": null
},
{
"desc": "<p>\nConfigures the rules for event tracking and coalescing. This feature is\nused to define equivalency between different audit events. When\nevents match, according to a set of configurable parameters, only one\nentry in the audit list is generated for all the matching events.\n</p>\n\n<p>\nTracking works by keeping a reference to events when they first appear,\nand comparing other incoming events against the \"tracked\" events according\nto the rules defined here.\n</p>\n\n<p>Event trackers are defined in a JSON object like the following:</p>\n\n<pre>\n{\n \"timeToLive\" : [integer],\n \"fields\" : [\n {\n \"type\" : [string],\n \"name\" : [string]\n }\n ]\n}\n</pre>\n\n<p>\nWhere:\n</p>\n\n<ul>\n <li>timeToLive: maximum amount of time an event will be tracked, in\n milliseconds. Must be provided. This defines how long, since it's\n first seen, an event will be tracked. A value of 0 disables tracking.</li>\n\n <li>fields: list of fields to compare when matching events against\n tracked events.</li>\n</ul>\n\n<p>\nEach field has an evaluator type associated with it. The evaluator defines\nhow the field data is to be compared. The following evaluators are\navailable:\n</p>\n\n<ul>\n <li>value: uses the field value for comparison.</li>\n\n <li>userName: treats the field value as a userNname, and ignores any\n host-specific data. This is useful for environment using Kerberos,\n so that only the principal name and realm are compared.</li>\n</ul>\n\n<p>\nThe following is the list of fields that can be used to compare Hive events:\n</p>\n\n<ul>\n <li>username: the user performing the action.</li>\n <li>ipAddress: the IP from where the request originated.</li>\n <li>operation: the Hive operation being performed.</li> \n <li>databaseName: the database affected by the operation.</li>\n <li>tableName: the table affected by the operation.</li> \n</ul>\n\n",
"display_name": "Event Tracker",
"name": "navigator_event_tracker",
"value": null
},
{
"desc": "Action to take when the audit event queue is full. Drop the event or shutdown the affected process.",
"display_name": "Queue Policy",
"name": "navigator_audit_queue_policy",
"value": "DROP"
},
{
"desc": "Comma-delimited list of hosts where you want to allow the Oozie user to impersonate other users. The default '*' allows all hosts. To disable entirely, use a string that doesn't correspond to a host name, such as '_no_host'.",
"display_name": "Oozie Proxy User Hosts",
"name": "oozie_proxy_user_hosts_list",
"value": "*"
},
{
"desc": "The user that this service's processes should run as.",
"display_name": "System User",
"name": "process_username",
"value": "hive"
},
{
"desc": "<p>Event filters are defined in a JSON object like the following:</p>\n\n<pre>\n{\n \"defaultAction\" : (\"accept\", \"discard\"),\n \"rules\" : [\n {\n \"action\" : (\"accept\", \"discard\"),\n \"fields\" : [\n {\n \"name\" : \"fieldName\",\n \"match\" : \"regex\"\n }\n ]\n }\n ]\n}\n</pre>\n\n<p>\nA filter has a default action and a list of rules, in order of precedence.\nEach rule defines an action, and a list of fields to match against the\naudit event.\n</p>\n\n<p>\nA rule is \"accepted\" if all the listed field entries match the audit\nevent. At that point, the action declared by the rule is taken.\n</p>\n\n<p>\nIf no rules match the event, the default action is taken. Actions\ndefault to \"accept\" if not defined in the JSON object.\n</p>\n\n<p>\nThe following is the list of fields that can be filtered for Hive events:\n</p>\n\n<ul>\n <li>userName: the user performing the action.</li>\n <li>ipAddress: the IP from where the request originated.</li>\n <li>operation: the Hive operation being performed.</li> \n <li>databaseName: the databaseName for the operation.</li>\n <li>tableName: the tableName for the operation.</li>\n</ul>\n",
"display_name": "Event Filter",
"name": "navigator_audit_event_filter",
"value": "{\n \"comment\" : [\n \"Default filter for Hive services.\",\n \"Discards events generated by Hive MR jobs in /tmp directory\"\n ],\n \"defaultAction\" : \"accept\",\n \"rules\" : [\n {\n \"action\" : \"discard\",\n \"fields\" : [\n { \"name\" : \"operation\", \"match\" : \"QUERY\" },\n { \"name\" : \"objectType\", \"match\" : \"DFS_DIR\"},\n { \"name\" : \"resourcePath\", \"match\" : \"/tmp/hive-(?:.+)?/hive_(?:.+)?/-mr-.*\" }\n ]\n }\n ]\n}\n"
},
{
"desc": "Max number of reducers to use. If the configuration parameter Hive Reduce Tasks is negative, Hive will limit the number of reducers to the value of this parameter.",
"display_name": "Hive Max Reducers",
"name": "hive_max_reducers",
"value": "999"
},
{
"desc": "Enable support for encrypted client-server communication using Secure Socket Layer (SSL) for HiveServer2 connections. This is only applicable to non-Kerberos environments.",
"display_name": "Enable SSL for HiveServer",
"name": "hiveserver2_enable_ssl",
"value": "false"
},
{
"desc": "Type of Hive Metastore database. Note that Derby is not recommended and Cloudera Impala does not support Derby.",
"display_name": "Hive Metastore Database Type",
"name": "hive_metastore_database_type",
"value": "mysql"
},
{
"desc": "<p>The configured triggers for this service. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.</p><p>Each trigger has all of the following fields:</p><ul><li><code>triggerName</code> <strong>(mandatory)</strong> - the name of the trigger. This value must be unique for the specific service. </li><li><code>triggerExpression</code> <strong>(mandatory)</strong> - a tsquery expression representing the trigger. </li><li><code>streamThreshold</code> <strong>(optional)</strong> - the maximum number of streams that can satisfy a condition of a trigger before the condition fires. By default set to 0, and any stream returned will cause the condition to fire. </li><li><code>enabled</code> <strong> (optional)</strong> - by default set to 'true'. If set to 'false' the trigger will not be evaluated.</li></ul></p><p>For example, here is a JSON formatted trigger that fires if there are more than 10 DataNodes with more than 500 file-descriptors opened:</p><p><pre>[{\"triggerName\": \"sample-trigger\",\n \"triggerExpression\": \"IF (SELECT fd_open WHERE roleType = DataNode and last(fd_open) > 500) DO health:bad\",\n \"streamThreshold\": 10, \"enabled\": \"true\"}]</pre></p><p>Consult the trigger rules documentation for more details on how to write triggers using tsquery.</p><p>The JSON format is evolving and may change in the future and as a result backward compatibility is not guaranteed between releases at this time.</p>",
"display_name": "Service Triggers",
"name": "service_triggers",
"value": "[]"
},
{
"desc": "Maximum number of rolled over audit logs to retain. The logs will not be deleted if they contain audit events that have not yet been propagated to Audit Server.",
"display_name": "Number of Audit Logs to Retain",
"name": "navigator_audit_log_max_backup_index",
"value": "10"
},
{
"desc": "Port number of Hive Metastore database",
"display_name": "Hive Metastore Database Port",
"name": "hive_metastore_database_port",
"value": "3306"
},
{
"desc": "Name of the ZooKeeper service that this Hive service instance depends on.",
"display_name": "ZooKeeper Service",
"name": "zookeeper_service",
"value": null
},
{
"desc": "Host name of Hive Metastore database",
"display_name": "Hive Metastore Database Host",
"name": "hive_metastore_database_host",
"value": "localhost"
},
{
"desc": "Maximum size of audit log file in MB before it is rolled over.",
"display_name": "Maximum Audit Log File Size",
"name": "navigator_audit_log_max_file_size",
"value": "100"
},
{
"desc": "Prevent Metastore operations in the event of schema version incompatibility. Consider setting this to true to reduce probability of schema corruption during Metastore operations. Note that setting this property to true will also set datanucleus.autoCreateSchema property to false and datanucleus.fixedDatastore property to true. Any values set in Cloudera Manager for these properties will be overridden.",
"display_name": "Strict Hive Metastore Schema Validation",
"name": "hive_metastore_schema_verification",
"value": "true"
},
{
"desc": "Path to the SSL keystore.",
"display_name": "Keystore File Path",
"name": "hiveserver2_keystore_path",
"value": null
},
{
"desc": "HDFS path to the global policy file for Sentry authorization. This should be a relative path (and not a full HDFS URL). The global policy file must be in Sentry policy file format.",
"display_name": "Sentry Global Policy File",
"name": "hive_sentry_provider_resource",
"value": "/user/hive/sentry/sentry-provider.ini"
},
{
"desc": "For advanced use only, key-value pairs (one on each line) to be inserted into a role's environment. Applies to configurations of all roles in this service except client configuration.",
"display_name": "Hive Service Environment Advanced Configuration Snippet (Safety Valve)",
"name": "hive_service_env_safety_valve",
"value": null
},
{
"desc": "Path to the directory where audit logs will be written. The directory will be created if it doesn't exist.",
"display_name": "Audit Log Directory",
"name": "audit_event_log_dir",
"value": "/var/log/hive/audit"
},
{
"desc": "Name of Hive Metastore database",
"display_name": "Hive Metastore Database Name",
"name": "hive_metastore_database_name",
"value": "metastore"
},
{
"desc": "The class to use in Sentry authorization for user to group mapping. Sentry authorization may be configured to use either Hadoop user to group mapping or local groups defined in the policy file. Hadoop user to group mapping may be configured in the Cloudera Manager HDFS service configuration page under the Security section.",
"display_name": "Sentry User to Group Mapping Class",
"name": "hive_sentry_provider",
"value": "org.apache.sentry.provider.file.HadoopGroupResourceAuthorizationProvider"
},
{
"desc": "Use Sentry to enable role-based, fine-grained authorization. This configuration enables Sentry using policy files. To enable Sentry using Sentry service instead, add Sentry service as a dependency to Hive service. <strong>Sentry service provides concurrent and secure access to authorization policy metadata and is the recommended option for enabling Sentry. </strong> Sentry is supported only on CDH 4.4 or later deployments. Before enabling Sentry, read the requirements and configuration steps outlined in <a class=\"bold\" href=\"http://tiny.cloudera.com/sentry-guide-cm5\" target=\"_blank\">Setting Up Hive Authorization with Sentry<i class=\"externalLink\"></i></a>.",
"display_name": "Enable Sentry Authorization using Policy Files",
"name": "sentry_enabled",
"value": "false"
},
{
"desc": "This configuration <strong>overrides</strong> the value set for Hive Proxy User Groups configuration in HDFS service for use by Hive Metastore Server. Specify a comma-delimited list of groups that you want to <strong>allow access to Hive Metastore metadata</strong> and allow the Hive user to impersonate. A value of '*' allows all groups. Default value of empty inherits the value set for Hive Proxy User Groups configuration in HDFS service.",
"display_name": "Hive Metastore Access Control and Proxy User Groups Override",
"name": "hive_proxy_user_groups_list",
"value": null
},
{
"desc": "Default number of reduce tasks per job. Usually set to a prime number close to the number of available hosts. Ignored when mapred.job.tracker is \"local\". Hadoop sets this to 1 by default, while Hive uses -1 as the default. When set to -1, Hive will automatically determine an appropriate number of reducers for each job.",
"display_name": "Hive Reduce Tasks",
"name": "hive_reduce_tasks",
"value": "-1"
},
{
"desc": "Let the table directories inherit the permission of the Warehouse or Database directory instead of being created with the permissions derived from dfs umask. This allows Impala to insert into tables created via Hive.",
"display_name": "Hive Warehouse Subdirectories Inherit Permissions",
"name": "hive_warehouse_subdir_inherit_perms",
"value": "true"
},
{
"desc": "The group that this service's processes should run as.",
"display_name": "System Group",
"name": "process_groupname",
"value": "hive"
},
{
"desc": "The health test thresholds of the overall Hive Metastore Server health. The check returns \"Concerning\" health if the percentage of \"Healthy\" Hive Metastore Servers falls below the warning threshold. The check is unhealthy if the total percentage of \"Healthy\" and \"Concerning\" Hive Metastore Servers falls below the critical threshold.",
"display_name": "Healthy Hive Metastore Server Monitoring Thresholds",
"name": "hive_hivemetastores_healthy_thresholds",
"value": "{\"critical\":\"51.0\",\"warning\":\"99.0\"}"
},
{
"desc": "When set, Cloudera Manager will send alerts when the health of this service reaches the threshold specified by the EventServer setting eventserver_health_events_alert_threshold",
"display_name": "Enable Service Level Health Alerts",
"name": "enable_alerts",
"value": "true"
},
{
"desc": "The server name used when defining privilege rules in Sentry authorization. Sentry uses this name as an alias for the Hive service. It has nothing to do with any physical server name.",
"display_name": "Server Name for Sentry Authorization",
"name": "hive_sentry_server",
"value": "server1"
},
{
"desc": "The health test thresholds of the overall HiveServer2 health. The check returns \"Concerning\" health if the percentage of \"Healthy\" HiveServer2s falls below the warning threshold. The check is unhealthy if the total percentage of \"Healthy\" and \"Concerning\" HiveServer2s falls below the critical threshold.",
"display_name": "Healthy HiveServer2 Monitoring Thresholds",
"name": "hive_hiveserver2s_healthy_thresholds",
"value": "{\"critical\":\"51.0\",\"warning\":\"99.0\"}"
},
{
"desc": "For advanced use only, a list of configuration properties that will be used by the Service Monitor instead of the current client configuration for the service.",
"display_name": "Service Monitor Client Config Overrides",
"name": "smon_client_config_overrides",
"value": "<property><name>hive.metastore.client.socket.timeout</name><value>20</value></property>"
},
{
"desc": "User for Hive Metastore database",
"display_name": "Hive Metastore Database User",
"name": "hive_metastore_database_user",
"value": "hive"
},
{
"desc": "Instead of talking to Hive Metastore Server for Metastore information, Hive clients will talk directly to the Metastore database.",
"display_name": "Bypass Hive Metastore Server",
"name": "hive_bypass_metastore_server",
"value": "false"
},
{
"desc": "Automatically create or upgrade tables in the Hive Metastore database when needed. Consider setting this to false and managing the schema manually.",
"display_name": "Auto Create and Upgrade Hive Metastore Database Schema",
"name": "hive_metastore_database_auto_create_schema",
"value": "false"
},
{
"desc": "Perform DataNucleus validation of metadata during startup. <strong>Note</strong>: when enabled, Hive will log DataNucleus warnings even though Hive will function normally.",
"display_name": "Hive Metastore Database DataNucleus Metadata Validation",
"name": "hive_metastore_database_datanucleus_metadata_validation",
"value": "false"
},
{
"desc": "For advanced use only, a string to be inserted into the client configuration for <strong>navigator.client.properties</strong>.",
"display_name": "Hive Client Advanced Configuration Snippet (Safety Valve) for navigator.client.properties",
"name": "navigator_client_config_safety_valve",
"value": null
},
{
"desc": "The health test thresholds of the overall WebHCat Server health. The check returns \"Concerning\" health if the percentage of \"Healthy\" WebHCat Servers falls below the warning threshold. The check is unhealthy if the total percentage of \"Healthy\" and \"Concerning\" WebHCat Servers falls below the critical threshold.",
"display_name": "Healthy WebHCat Server Monitoring Thresholds",
"name": "hive_webhcats_healthy_thresholds",
"value": "{\"critical\":\"51.0\",\"warning\":\"99.0\"}"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>sentry-site.xml</strong>. Applies to configurations of all roles in this service except client configuration.",
"display_name": "Hive Service Advanced Configuration Snippet (Safety Valve) for sentry-site.xml",
"name": "hive_server2_sentry_safety_valve",
"value": null
},
{
"desc": "Size per reducer. If the input size is 10GiB and this is set to 1GiB, Hive will use 10 reducers.",
"display_name": "Hive Bytes Per Reducer",
"name": "hive_bytes_per_reducer",
"value": "1073741824"
},
{
"desc": "Enable collection of audit events from the service's roles.",
"display_name": "Enable Collection",
"name": "navigator_audit_enabled",
"value": "true"
},
{
"desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.",
"display_name": "Enable Configuration Change Alerts",
"name": "enable_config_alerts",
"value": "false"
},
{
"desc": "Allows URIs when defining privileges in per-database policy files. <strong>Warning:</strong> Typically, this configuration should be disabled. Enabling it would allow database policy file owner (which is generally not Hive admin user) to grant load privileges to any directory with read access to Hive admin user, including databases controlled by other database policy files.",
"display_name": "Allow URIs in Database Policy File",
"name": "sentry_allow_uri_db_policyfile",
"value": "false"
},
{
"desc": "In unsecure mode, setting this property to true will cause the Metastore Server to execute DFS operations using the client's reported user and group permissions. Cloudera Manager will set this for all clients and servers.",
"display_name": "Set User and Group Information",
"name": "hive_set_ugi",
"value": "true"
},
{
"desc": "For advanced use only, a list of derived configuration properties that will be used by the Service Monitor instead of the default ones.",
"display_name": "Service Monitor Derived Configs Advanced Configuration Snippet (Safety Valve)",
"name": "smon_derived_configs_safety_valve",
"value": null
},
{
"desc": "MapReduce jobs are run against this service.",
"display_name": "MapReduce Service",
"name": "mapreduce_yarn_service",
"value": null
}
]

View File

@ -0,0 +1,182 @@
[
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory. Specified as a percentage of the capacity on that filesystem. This setting is not used if a Log Directory Free Space Monitoring Absolute Thresholds setting is configured.",
"display_name": "Log Directory Free Space Monitoring Percentage Thresholds",
"name": "log_directory_free_space_percentage_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"never\"}"
},
{
"desc": "Port on which WebHCat Server will listen for connections.",
"display_name": "WebHCat Server Port",
"name": "hive_webhcat_address_port",
"value": "50111"
},
{
"desc": "When set, this role's process is automatically (and transparently) restarted in the event of an unexpected failure.",
"display_name": "Automatically Restart Process",
"name": "process_auto_restart",
"value": "false"
},
{
"desc": "The maximum number of rolled log files to keep for WebHCat Server logs. Typically used by log4j.",
"display_name": "WebHCat Server Maximum Log File Backups",
"name": "max_log_backup_index",
"value": "10"
},
{
"desc": "<p>The configured triggers for this role. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.</p><p>Each trigger has all of the following fields:</p><ul><li><code>triggerName</code> <strong>(mandatory)</strong> - the name of the trigger. This value must be unique for the specific role. </li><li><code>triggerExpression</code> <strong>(mandatory)</strong> - a tsquery expression representing the trigger. </li><li><code>streamThreshold</code> <strong>(optional)</strong> - the maximum number of streams that can satisfy a condition of a trigger before the condition fires. By default set to 0, and any stream returned will cause the condition to fire. </li><li><code>enabled</code> <strong> (optional)</strong> - by default set to 'true'. If set to 'false' the trigger will not be evaluated.</li></ul></p><p>For example, here is a JSON formatted trigger configured for a DataNode that fires if the DataNode has more than 1500 file-descriptors opened:</p><p><pre>[{\"triggerName\": \"sample-trigger\",\n \"triggerExpression\": \"IF (SELECT fd_open WHERE roleName=$ROLENAME and last(fd_open) > 1500) DO health:bad\",\n \"streamThreshold\": 0, \"enabled\": \"true\"}]</pre></p><p>Consult the trigger rules documentation for more details on how to write triggers using tsquery.</p><p>The JSON format is evolving and may change in the future and as a result backward compatibility is not guaranteed between releases at this time.</p>",
"display_name": "Role Triggers",
"name": "role_triggers",
"value": "[]"
},
{
"desc": "Soft memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process if and only if the host is facing memory pressure. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Soft Limit",
"name": "rm_memory_soft_limit",
"value": "-1"
},
{
"desc": "Maximum size in bytes for the Java Process heap memory. Passed to Java -Xmx.",
"display_name": "Java Heap Size of WebHCat Server in Bytes",
"name": "hive_webhcat_java_heapsize",
"value": "268435456"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>hive-site.xml</strong> for this role only.",
"display_name": "WebHCat Server Advanced Configuration Snippet (Safety Valve) for hive-site.xml",
"name": "hive_webhcat_hive_config_safety_valve",
"value": null
},
{
"desc": "Hard memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Hard Limit",
"name": "rm_memory_hard_limit",
"value": "-1"
},
{
"desc": "The period to review when computing unexpected exits.",
"display_name": "Unexpected Exits Monitoring Period",
"name": "unexpected_exits_window",
"value": "5"
},
{
"desc": "The health test thresholds of the number of file descriptors used. Specified as a percentage of file descriptor limit.",
"display_name": "File Descriptor Monitoring Thresholds",
"name": "webhcat_fd_thresholds",
"value": "{\"critical\":\"70.0\",\"warning\":\"50.0\"}"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>log4j.properties</strong> for this role only.",
"display_name": "WebHCat Server Logging Advanced Configuration Snippet (Safety Valve)",
"name": "log4j_safety_valve",
"value": null
},
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory.",
"display_name": "Log Directory Free Space Monitoring Absolute Thresholds",
"name": "log_directory_free_space_absolute_thresholds",
"value": "{\"critical\":\"5.36870912E9\",\"warning\":\"1.073741824E10\"}"
},
{
"desc": "For advanced use only, key-value pairs (one on each line) to be inserted into a role's environment. Applies to configurations of this role except client configuration.",
"display_name": "WebHCat Server Environment Advanced Configuration Snippet (Safety Valve)",
"name": "hive_webhcat_env_safety_valve",
"value": null
},
{
"desc": "These arguments will be passed as part of the Java command line. Commonly, garbage collection flags or extra debugging flags would be passed here.",
"display_name": "Java Configuration Options for WebHCat Server",
"name": "hive_webhcat_java_opts",
"value": "-XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:-CMSConcurrentMTEnabled -XX:CMSInitiatingOccupancyFraction=70 -XX:+CMSParallelRemarkEnabled"
},
{
"desc": "If configured, overrides the process soft and hard rlimits (also called ulimits) for file descriptors to the configured value.",
"display_name": "Maximum Process File Descriptors",
"name": "rlimit_fds",
"value": null
},
{
"desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.",
"display_name": "Enable Configuration Change Alerts",
"name": "enable_config_alerts",
"value": "false"
},
{
"desc": "The maximum size, in megabytes, per log file for WebHCat Server logs. Typically used by log4j.",
"display_name": "WebHCat Server Max Log Size",
"name": "max_log_size",
"value": "200"
},
{
"desc": "When set, a SIGKILL signal is sent to the role process when java.lang.OutOfMemoryError is thrown.",
"display_name": "Kill When Out of Memory",
"name": "oom_sigkill_enabled",
"value": "true"
},
{
"desc": "Enables the health test that the WebHCat Server's process state is consistent with the role configuration",
"display_name": "WebHCat Server Process Health Test",
"name": "webhcat_scm_health_enabled",
"value": "true"
},
{
"desc": "When set, generates heap dump file when java.lang.OutOfMemoryError is thrown.",
"display_name": "Dump Heap When Out of Memory",
"name": "oom_heap_dump_enabled",
"value": "false"
},
{
"desc": "The health test thresholds for unexpected exits encountered within a recent period specified by the unexpected_exits_window configuration for the role.",
"display_name": "Unexpected Exits Thresholds",
"name": "unexpected_exits_thresholds",
"value": "{\"critical\":\"any\",\"warning\":\"never\"}"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>webhcat-site.xml</strong> for this role only.",
"display_name": "WebHCat Server Advanced Configuration Snippet (Safety Valve) for webhcat-site.xml",
"name": "hive_webhcat_config_safety_valve",
"value": null
},
{
"desc": "When set, Cloudera Manager will send alerts when the health of this role reaches the threshold specified by the EventServer setting eventserver_health_events_alert_threshold",
"display_name": "Enable Health Alerts for this Role",
"name": "enable_alerts",
"value": "true"
},
{
"desc": "When computing the overall WebHCat Server health, consider the host's health.",
"display_name": "WebHCat Server Host Health Test",
"name": "webhcat_host_health_enabled",
"value": "true"
},
{
"desc": "Weight for the read I/O requests issued by this role. The greater the weight, the higher the priority of the requests when the host experiences I/O contention. Must be between 100 and 1000. Defaults to 1000 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup I/O Weight",
"name": "rm_io_weight",
"value": "500"
},
{
"desc": "Number of CPU shares to assign to this role. The greater the number of shares, the larger the share of the host's CPUs that will be given to this role when the host experiences CPU contention. Must be between 2 and 262144. Defaults to 1024 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup CPU Shares",
"name": "rm_cpu_shares",
"value": "1024"
},
{
"desc": "Path to directory where heap dumps are generated when java.lang.OutOfMemoryError error is thrown. This directory is automatically created if it doesn't exist. However, if this directory already exists, role user must have write access to this directory. If this directory is shared amongst multiple roles, it should have 1777 permissions. Note that the heap dump files are created with 600 permissions and are owned by the role user. The amount of free space in this directory should be greater than the maximum Java Process heap size configured for this role.",
"display_name": "Heap Dump Directory",
"name": "oom_heap_dump_dir",
"value": "/tmp"
},
{
"desc": "The minimum log level for WebHCat Server logs",
"display_name": "WebHCat Server Logging Threshold",
"name": "log_threshold",
"value": "INFO"
},
{
"desc": "Directory where WebHCat Server will place its log files.",
"display_name": "WebHCat Server Log Directory",
"name": "hcatalog_log_dir",
"value": "/var/log/hcatalog"
}
]

View File

@ -46,3 +46,7 @@ def get_secondarynamenode(cluster):
def get_historyserver(cluster):
return u.get_instance(cluster, 'JOBHISTORY')
def get_hive_metastore(cluster):
return u.get_instance(cluster, 'HIVEMETASTORE')

View File

@ -83,6 +83,26 @@ def validate_cluster_creating(cluster):
raise ex.RequiredServiceMissingException(
'JOBHISTORY', required_by='OOZIE_SERVER')
hms_count = _get_inst_count(cluster, 'HIVEMETASTORE')
hvs_count = _get_inst_count(cluster, 'HIVESERVER2')
whc_count = _get_inst_count(cluster, 'WEBHCAT')
if hms_count and rm_count < 1:
raise ex.RequiredServiceMissingException(
'RESOURCEMANAGER', required_by='HIVEMETASTORE')
if hms_count and not hvs_count:
raise ex.RequiredServiceMissingException(
'HIVESERVER2', required_by='HIVEMETASTORE')
if hvs_count and not hms_count:
raise ex.RequiredServiceMissingException(
'HIVEMETASTORE', required_by='HIVESERVER2')
if whc_count and not hms_count:
raise ex.RequiredServiceMissingException(
'HIVEMETASTORE', required_by='WEBHCAT')
def validate_additional_ng_scaling(cluster, additional):
rm = cu.get_resourcemanager(cluster)