Add HBase support to CDH plugin

Change-Id: I3fd9b4663fcd37051289068baedbbfbd34447bac
This commit is contained in:
luhuichun 2014-11-17 09:39:10 +08:00
parent 55e8fbc8d3
commit 4533630c64
10 changed files with 1406 additions and 3 deletions

View File

@ -38,6 +38,7 @@ HIVE_SERVICE_NAME = 'hive01'
HUE_SERVICE_NAME = 'hue01'
SPARK_SERVICE_NAME = 'spark_on_yarn01'
ZOOKEEPER_SERVICE_NAME = 'zookeeper01'
HBASE_SERVICE_NAME = 'hbase01'
def have_cm_api_libs():
@ -107,6 +108,8 @@ def get_service(process, cluster=None, instance=None):
return cm_cluster.get_service(SPARK_SERVICE_NAME)
elif process in ['SERVER']:
return cm_cluster.get_service(ZOOKEEPER_SERVICE_NAME)
elif process in ['MASTER', 'REGIONSERVER']:
return cm_cluster.get_service(HBASE_SERVICE_NAME)
else:
raise ValueError(
_("Process %(process)s is not supported by CDH plugin") %
@ -163,6 +166,8 @@ def get_role_name(instance, service):
'WEBHCAT': 'WHC',
'SPARK_YARN_HISTORY_SERVER': 'SHS',
'SERVER': 'S',
'MASTER': 'M',
'REGIONSERVER': 'RS'
}
return '%s_%s' % (shortcuts.get(service, service),
instance.hostname().replace('-', '_'))
@ -225,3 +230,8 @@ def create_hive_metastore_db(hive_service):
def create_hive_dirs(hive_service):
yield hive_service.create_hive_userdir()
yield hive_service.create_hive_warehouse()
@cloudera_cmd
def create_hbase_root(hbase_service):
yield hbase_service.create_hbase_root()

View File

@ -109,6 +109,9 @@ spark_service_confs = _load_json(path_to_config + 'spark-service.json')
spark_role_confs = _load_json(path_to_config + 'spark-history.json')
zookeeper_server_confs = _load_json(path_to_config + 'zookeeper-server.json')
zookeeper_service_confs = _load_json(path_to_config + 'zookeeper-service.json')
hbase_confs = _load_json(path_to_config + 'hbase-service.json')
master_confs = _load_json(path_to_config + 'hbase-master.json')
regionserver_confs = _load_json(path_to_config + 'hbase-regionserver.json')
priority_one_confs = _load_json(path_to_config + 'priority-one-confs.json')
@ -154,6 +157,9 @@ def _get_ng_plugin_configs():
cfg += _init_configs(spark_role_confs, 'SPARK_ON_YARN', 'node')
cfg += _init_configs(zookeeper_service_confs, 'ZOOKEEPER', 'cluster')
cfg += _init_configs(zookeeper_server_confs, 'ZOOKEEPER', 'node')
cfg += _init_configs(hbase_confs, 'HBASE', 'cluster')
cfg += _init_configs(master_confs, 'MASTER', 'node')
cfg += _init_configs(regionserver_confs, 'REGIONSERVER', 'node')
return cfg

View File

@ -43,6 +43,7 @@ HIVE_SERVICE_TYPE = 'HIVE'
HUE_SERVICE_TYPE = 'HUE'
SPARK_SERVICE_TYPE = 'SPARK_ON_YARN'
ZOOKEEPER_SERVICE_TYPE = 'ZOOKEEPER'
HBASE_SERVICE_TYPE = 'HBASE'
PATH_TO_CORE_SITE_XML = '/etc/hadoop/conf/core-site.xml'
HADOOP_LIB_DIR = '/usr/lib/hadoop-mapreduce'
@ -67,7 +68,8 @@ PACKAGES = [
'oracle-j2sdk1.7',
'spark-history-server',
'unzip',
'zookeeper'
'zookeeper',
'hbase'
]
LOG = logging.getLogger(__name__)
@ -105,6 +107,10 @@ def _get_configs(service, cluster=None, node_group=None):
},
'SPARK_ON_YARN': {
'yarn_service': cu.YARN_SERVICE_NAME
},
'HBASE': {
'hdfs_service': cu.HDFS_SERVICE_NAME,
'zookeeper_service': cu.ZOOKEEPER_SERVICE_NAME
}
}
@ -359,6 +365,8 @@ def _create_services(cluster):
cm_cluster.create_service(cu.HUE_SERVICE_NAME, HUE_SERVICE_TYPE)
if pu.get_spark_historyserver(cluster):
cm_cluster.create_service(cu.SPARK_SERVICE_NAME, SPARK_SERVICE_TYPE)
if pu.get_hbase_master(cluster):
cm_cluster.create_service(cu.HBASE_SERVICE_NAME, HBASE_SERVICE_TYPE)
def _configure_services(cluster):
@ -389,6 +397,9 @@ def _configure_services(cluster):
if pu.get_spark_historyserver(cluster):
spark = cm_cluster.get_service(cu.SPARK_SERVICE_NAME)
spark.update_config(_get_configs(SPARK_SERVICE_TYPE, cluster=cluster))
if pu.get_hbase_master(cluster):
hbase = cm_cluster.get_service(cu.HBASE_SERVICE_NAME)
hbase.update_config(_get_configs(HBASE_SERVICE_TYPE, cluster=cluster))
def _configure_instances(instances):
@ -524,6 +535,11 @@ def start_cluster(cluster):
spark = cm_cluster.get_service(cu.SPARK_SERVICE_NAME)
cu.start_service(spark)
if pu.get_hbase_master(cluster):
hbase = cm_cluster.get_service(cu.HBASE_SERVICE_NAME)
cu.create_hbase_root(hbase)
cu.start_service(hbase)
def get_open_ports(node_group):
ports = [9000] # for CM agent
@ -542,7 +558,9 @@ def get_open_ports(node_group):
'HUE_SERVER': [8888],
'OOZIE_SERVER': [11000, 11001],
'SPARK_YARN_HISTORY_SERVER': [18088],
'SERVER': [2181, 3181, 4181, 9010]
'SERVER': [2181, 3181, 4181, 9010],
'MASTER': [60000],
'REGIONSERVER': [60020]
}
for process in node_group.node_processes:

View File

@ -56,7 +56,10 @@ class CDHPluginProvider(p.ProvisioningPluginBase):
"WEBHCAT": ['WEBHCAT'],
"HUE": ['HUE_SERVER'],
"SPARK_ON_YARN": ['SPARK_YARN_HISTORY_SERVER'],
"ZOOKEEPER": ['SERVER']
"ZOOKEEPER": ['SERVER'],
"HBASE": [],
"MASTER": ['MASTER'],
"REGIONSERVER": ['REGIONSERVER']
}
def get_configs(self, hadoop_version):

View File

@ -32,6 +32,7 @@ hive_service_name = 'hive01'
hue_service_name = 'hue01'
spark_service_name = 'spark_on_yarn01'
zookeeper_service_name = 'zookeeper01'
hbase_service_name = 'hbase01'
def get_cm_api():
@ -102,6 +103,8 @@ def main():
zookeeper = cluster.get_service(zookeeper_service_name)
process_service(zookeeper, 'zookeeper')
hbase = cluster.get_service(hbase_service_name)
process_service(hbase, 'hbase')
if __name__ == '__main__':
main()

View File

@ -0,0 +1,326 @@
[
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory. Specified as a percentage of the capacity on that filesystem. This setting is not used if a Log Directory Free Space Monitoring Absolute Thresholds setting is configured.",
"display_name": "Log Directory Free Space Monitoring Percentage Thresholds",
"name": "log_directory_free_space_percentage_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"never\"}"
},
{
"desc": "<p>This file contains the rules which govern how log messages are turned into events by the custom log4j appender that this role loads. It is in JSON format, and is composed of a list of rules. Every log message is evaluated against each of these rules in turn to decide whether or not to send an event for that message.</p><p>Each rule has some or all of the following fields:</p><ul><li><span class='code'>alert</span> - whether or not events generated from this rule should be promoted to alerts. A value of \"true\" will cause alerts to be generated. If not specified, the default is \"false\".</li><li><span class='code'>rate</span> <strong>(mandatory)</strong> - the maximum number of log messages matching this rule that may be sent as events every minute. If more than <tt>rate</tt> matching log messages are received in a single minute, the extra messages are ignored. If rate is less than 0, the number of messages per minute is unlimited.</li><li><span class='code'>periodminutes</span> - the number of minutes during which the publisher will only publish <tt>rate</tt> events or fewer. If not specified, the default is <strong>one minute</strong></li><li><span class='code'>threshold</span> - apply this rule only to messages with this log4j severity level or above. An example is \"WARN\" for warning level messages or higher.</li><li><span class='code'>content</span> - match only those messages whose contents match this regular expression.</li><li><span class='code'>exceptiontype</span> - match only those messages which are part of an exception message. The exception type must match this regular expression.</li></ul><br/><p>Example:<span class='code'>{\"alert\": false, \"rate\": 10, \"exceptiontype\": \"java.lang.StringIndexOutOfBoundsException\"}</span></p><p>This rule will send events to Cloudera Manager for every <span class='code'>StringIndexOutOfBoundsException</span>, up to a maximum of 10 every minute.</p>",
"display_name": "Rules to Extract Events from Log Files",
"name": "log_event_whitelist",
"value": "{\n \"version\": \"0\",\n \"rules\": [\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"FATAL\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.io.IOException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.net.SocketException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.net.SocketClosedException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.io.EOFException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.nio.channels.CancelledKeyException\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Instead, use .*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Use .* instead\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\", \"content\": \"IPC Server handler.*ClosedChannelException\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\", \"content\": \"IPC Server Responder, call.*output error\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\", \"content\": \"Daughter regiondir does not exist: .*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\", \"content\": \"File.*might still be open.*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\", \"content\": \"File.*might still be open.*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\", \"content\": \"Moving table .+ state to enabled but was already enabled\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\", \"content\": \"Received OPENED for region.*but region was in the state.*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 2, \"exceptiontype\": \".*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"Unknown job [^ ]+ being deleted.*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"Error executing shell command .+ No such process.+\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\".*attempt to override final parameter.+\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"[^ ]+ is a deprecated filesystem name. Use.*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\"}\n ]\n}\n"
},
{
"desc": "The amount of time allowed after this role is started that failures of health checks that rely on communication with this role will be tolerated.",
"display_name": "Health Check Startup Tolerance",
"name": "master_startup_tolerance",
"value": "5"
},
{
"desc": "Whether or not periodic stacks collection is enabled.",
"display_name": "Stacks Collection Enabled",
"name": "stacks_collection_enabled",
"value": "false"
},
{
"desc": "Number of pooled threads to handle region closing in the master.",
"display_name": "Region Closing Threads",
"name": "hbase_master_executor_closeregion_threads",
"value": "5"
},
{
"desc": "When computing the overall Master health, consider the host's health.",
"display_name": "Master Host Health Test",
"name": "master_host_health_enabled",
"value": "true"
},
{
"desc": "Number of pooled threads to handle the recovery of the region servers in the master.",
"display_name": "RegionServer Recovery Threads",
"name": "hbase_master_executor_serverops_threads",
"value": "5"
},
{
"desc": "Weight for the read I/O requests issued by this role. The greater the weight, the higher the priority of the requests when the host experiences I/O contention. Must be between 100 and 1000. Defaults to 1000 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup I/O Weight",
"name": "rm_io_weight",
"value": "500"
},
{
"desc": "Directory where Master will place its log files.",
"display_name": "Master Log Directory",
"name": "hbase_master_log_dir",
"value": "/var/log/hbase"
},
{
"desc": "The health test thresholds on the duration of the metrics request to the web server.",
"display_name": "Web Metric Collection Duration",
"name": "master_web_metric_collection_thresholds",
"value": "{\"critical\":\"never\",\"warning\":\"10000.0\"}"
},
{
"desc": "Time period in seconds to reset long-running metrics (e.g. compactions). This is an HBase specific configuration.",
"display_name": "Extended Period",
"name": "hbase_metrics_extended_period",
"value": "3600"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>log4j.properties</strong> for this role only.",
"display_name": "Master Logging Advanced Configuration Snippet (Safety Valve)",
"name": "log4j_safety_valve",
"value": null
},
{
"desc": "The method that will be used to collect stacks. The jstack option involves periodically running the jstack command against the role's daemon process. The servlet method is available for those roles that expose an HTTP server endpoint exposing the current stacks traces of all threads. When the servlet method is selected that HTTP endpoint is periodically scraped.",
"display_name": "Stacks Collection Method",
"name": "stacks_collection_method",
"value": "jstack"
},
{
"desc": "The health test thresholds of the number of file descriptors used. Specified as a percentage of file descriptor limit.",
"display_name": "File Descriptor Monitoring Thresholds",
"name": "master_fd_thresholds",
"value": "{\"critical\":\"70.0\",\"warning\":\"50.0\"}"
},
{
"desc": "The port that the HBase Master binds to.",
"display_name": "HBase Master Port",
"name": "hbase_master_port",
"value": "60000"
},
{
"desc": "Enables the health test that the Cloudera Manager Agent can successfully contact and gather metrics from the web server.",
"display_name": "Web Metric Collection",
"name": "master_web_metric_collection_enabled",
"value": "true"
},
{
"desc": "When set, this role's process is automatically (and transparently) restarted in the event of an unexpected failure.",
"display_name": "Automatically Restart Process",
"name": "process_auto_restart",
"value": "false"
},
{
"desc": "List of org.apache.hadoop.hbase.coprocessor.MasterObserver coprocessors that are loaded by default on the active HMaster process. For any implemented coprocessor methods, the listed classes will be called in order. After implementing your own MasterObserver, just put it in HBase's classpath and add the fully qualified class name here.",
"display_name": "HBase Coprocessor Master Classes",
"name": "hbase_coprocessor_master_classes",
"value": ""
},
{
"desc": "<p>The configured triggers for this role. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.</p><p>Each trigger has all of the following fields:</p><ul><li><code>triggerName</code> <strong>(mandatory)</strong> - the name of the trigger. This value must be unique for the specific role. </li><li><code>triggerExpression</code> <strong>(mandatory)</strong> - a tsquery expression representing the trigger. </li><li><code>streamThreshold</code> <strong>(optional)</strong> - the maximum number of streams that can satisfy a condition of a trigger before the condition fires. By default set to 0, and any stream returned causes the condition to fire. </li><li><code>enabled</code> <strong> (optional)</strong> - by default set to 'true'. If set to 'false' the trigger will not be evaluated.</li></ul></p><p>For example, here is a JSON formatted trigger configured for a DataNode that fires if the DataNode has more than 1500 file-descriptors opened:</p><p><pre>[{\"triggerName\": \"sample-trigger\",\n \"triggerExpression\": \"IF (SELECT fd_open WHERE roleName=$ROLENAME and last(fd_open) > 1500) DO health:bad\",\n \"streamThreshold\": 0, \"enabled\": \"true\"}]</pre></p><p>Consult the trigger rules documentation for more details on how to write triggers using tsquery.</p><p>The JSON format is evolving and may change in the future and as a result backward compatibility is not guaranteed between releases at this time.</p>",
"display_name": "Role Triggers",
"name": "role_triggers",
"value": "[]"
},
{
"desc": "Maximum size in bytes for the Java Process heap memory. Passed to Java -Xmx.",
"display_name": "Java Heap Size of HBase Master in Bytes",
"name": "hbase_master_java_heapsize",
"value": "1073741824"
},
{
"desc": "Soft memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process if and only if the host is facing memory pressure. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Soft Limit",
"name": "rm_memory_soft_limit",
"value": "-1"
},
{
"desc": "The health test thresholds for the weighted average time spent in Java garbage collection. Specified as a percentage of elapsed wall clock time.",
"display_name": "Garbage Collection Duration Thresholds",
"name": "master_gc_duration_thresholds",
"value": "{\"critical\":\"60.0\",\"warning\":\"30.0\"}"
},
{
"desc": "Hard memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
"display_name": "Cgroup Memory Hard Limit",
"name": "rm_memory_hard_limit",
"value": "-1"
},
{
"desc": "When true, HBase Master will bind to 0.0.0.0. Only available with CDH 4.3 and later.",
"display_name": "HBase Master Bind to Wildcard Address",
"name": "hbase_master_bind_to_wildcard_address",
"value": "true"
},
{
"desc": "Enables the health test that a client can connect to the HBase Master",
"display_name": "HBase Master Canary Health Test",
"name": "master_canary_health_enabled",
"value": "true"
},
{
"desc": "The period to review when computing unexpected exits.",
"display_name": "Unexpected Exits Monitoring Period",
"name": "unexpected_exits_window",
"value": "5"
},
{
"desc": "Advanced Configuration Snippet (Safety Valve) for Hadoop Metrics2. Properties will be inserted into <strong>hadoop-metrics2.properties</strong>.",
"display_name": "Hadoop Metrics2 Advanced Configuration Snippet (Safety Valve)",
"name": "hadoop_metrics2_safety_valve",
"value": null
},
{
"desc": "The amount of stacks data that will be retained. After the retention limit is hit, the oldest data will be deleted.",
"display_name": "Stacks Collection Data Retention",
"name": "stacks_collection_data_retention",
"value": "104857600"
},
{
"desc": "The port for the HBase Master web UI. Set to -1 to disable the HBase Master web UI.",
"display_name": "HBase Master Web UI Port",
"name": "hbase_master_info_port",
"value": "60010"
},
{
"desc": "The period to review when computing the moving average of garbage collection time.",
"display_name": "Garbage Collection Duration Monitoring Period",
"name": "master_gc_duration_window",
"value": "5"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>hbase-site.xml</strong> for this role only.",
"display_name": "Master Advanced Configuration Snippet (Safety Valve) for hbase-site.xml",
"name": "hbase_master_config_safety_valve",
"value": null
},
{
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory.",
"display_name": "Log Directory Free Space Monitoring Absolute Thresholds",
"name": "log_directory_free_space_absolute_thresholds",
"value": "{\"critical\":\"5.36870912E9\",\"warning\":\"1.073741824E10\"}"
},
{
"desc": "If configured, overrides the process soft and hard rlimits (also called ulimits) for file descriptors to the configured value.",
"display_name": "Maximum Process File Descriptors",
"name": "rlimit_fds",
"value": null
},
{
"desc": "These arguments will be passed as part of the Java command line. Commonly, garbage collection flags or extra debugging flags would be passed here.",
"display_name": "Java Configuration Options for HBase Master",
"name": "hbase_master_java_opts",
"value": "-XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:-CMSConcurrentMTEnabled -XX:CMSInitiatingOccupancyFraction=70 -XX:+CMSParallelRemarkEnabled"
},
{
"desc": "When set, Cloudera Manager will send alerts when the health of this role reaches the threshold specified by the EventServer setting eventserver_health_events_alert_threshold",
"display_name": "Enable Health Alerts for this Role",
"name": "enable_alerts",
"value": "true"
},
{
"desc": "The maximum size, in megabytes, per log file for Master logs. Typically used by log4j.",
"display_name": "Master Max Log Size",
"name": "max_log_size",
"value": "200"
},
{
"desc": "When set, a SIGKILL signal is sent to the role process when java.lang.OutOfMemoryError is thrown.",
"display_name": "Kill When Out of Memory",
"name": "oom_sigkill_enabled",
"value": "true"
},
{
"desc": "Maximum time an HLog remains in the .oldlogdir directory until an HBase Master thread deletes it.",
"display_name": "Maximum Time to Keep HLogs",
"name": "hbase_master_logcleaner_ttl",
"value": "60000"
},
{
"desc": "When set, generates heap dump file when java.lang.OutOfMemoryError is thrown.",
"display_name": "Dump Heap When Out of Memory",
"name": "oom_heap_dump_enabled",
"value": "false"
},
{
"desc": "The health test thresholds for unexpected exits encountered within a recent period specified by the unexpected_exits_window configuration for the role.",
"display_name": "Unexpected Exits Thresholds",
"name": "unexpected_exits_thresholds",
"value": "{\"critical\":\"any\",\"warning\":\"never\"}"
},
{
"desc": "Number of RPC Server instances spun up on HBase Master.",
"display_name": "HBase Master Handler Count",
"name": "hbase_master_handler_count",
"value": "25"
},
{
"desc": "The address for the HBase Master web UI",
"display_name": "HBase Master Web UI Address",
"name": "hbase_master_info_bindAddress",
"value": null
},
{
"desc": "A comma-separated list of LogCleanerDelegate(s) that are used in LogsCleaner. WAL/HLog cleaner(s) are called in order, so put the log cleaner that prunes the most log files in the front. To implement your own LogCleanerDelegate, add it to HBase's classpath and add the fully-qualified class name here. You should always add the above default log cleaners in the list, unless you have a special reason not to.",
"display_name": "HBase Master Log Cleaner Plugins",
"name": "hbase_master_logcleaner_plugins",
"value": null
},
{
"desc": "The directory in which stacks logs will be placed. If not set, stacks will be logged into a <span class='code'>stacks</span>subdirectory of the role's log directory.",
"display_name": "Stacks Collection Directory",
"name": "stacks_collection_directory",
"value": null
},
{
"desc": "The maximum number of rolled log files to keep for Master logs. Typically used by log4j.",
"display_name": "Master Maximum Log File Backups",
"name": "max_log_backup_index",
"value": "10"
},
{
"desc": "The host name or IP address of the DNS name server which an HBase Master should use to determine the host name used for communication and display purposes.",
"display_name": "HBase Master DNS Name Server",
"name": "hbase_master_dns_nameserver",
"value": null
},
{
"desc": "Number of CPU shares to assign to this role. The greater the number of shares, the larger the share of the host's CPUs that will be given to this role when the host experiences CPU contention. Must be between 2 and 262144. Defaults to 1024 for processes not managed by Cloudera Manager.",
"display_name": "Cgroup CPU Shares",
"name": "rm_cpu_shares",
"value": "1024"
},
{
"desc": "Path to directory where heap dumps are generated when java.lang.OutOfMemoryError error is thrown. This directory is automatically created if it doesn't exist. However, if this directory already exists, role user must have write access to this directory. If this directory is shared amongst multiple roles, it should have 1777 permissions. Note that the heap dump files are created with 600 permissions and are owned by the role user. The amount of free space in this directory should be greater than the maximum Java Process heap size configured for this role.",
"display_name": "Heap Dump Directory",
"name": "oom_heap_dump_dir",
"value": "/tmp"
},
{
"desc": "Number of pooled threads to handle region opening in the master.",
"display_name": "Region Opening Threads",
"name": "hbase_master_executor_openregion_threads",
"value": "5"
},
{
"desc": "The frequency with which stacks will be collected.",
"display_name": "Stacks Collection Frequency",
"name": "stacks_collection_frequency",
"value": "5.0"
},
{
"desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.",
"display_name": "Enable Configuration Change Alerts",
"name": "enable_config_alerts",
"value": "false"
},
{
"desc": "The name of the DNS network interface from which an HBase Master should report its IP address.",
"display_name": "HBase Master DNS Network Interface",
"name": "hbase_master_dns_interface",
"value": null
},
{
"desc": "The minimum log level for Master logs",
"display_name": "Master Logging Threshold",
"name": "log_threshold",
"value": "INFO"
},
{
"desc": "Enables the health test that the Master's process state is consistent with the role configuration",
"display_name": "Master Process Health Test",
"name": "master_scm_health_enabled",
"value": "true"
}
]

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,476 @@
[
{
"desc": "Comma-delimited list of hosts where you want to allow the HBase user to impersonate other users. The default '*' allows all hosts. To disable entirely, use a string that does not correspond to a host name, such as '_no_host'. <b>Note:</b> This property is used only if HBase REST/Thrift Server Authentication is enabled.",
"display_name": "HBase Proxy User Hosts",
"name": "hbase_proxy_user_hosts_list",
"value": "*"
},
{
"desc": "The user that this service's processes should run as.",
"display_name": "System User",
"name": "process_username",
"value": "hbase"
},
{
"desc": "The frequency in which the log4j event publication appender will retry sending undelivered log events to the Event server, in seconds",
"display_name": "Log Event Retry Frequency",
"name": "log_event_retry_frequency",
"value": "30"
},
{
"desc": "A general client pause time value. Used mostly as a time period to wait before retrying operations such as a failed get or region lookup.",
"display_name": "HBase Client Pause",
"name": "hbase_client_pause",
"value": "100"
},
{
"desc": "Comma-delimited list of groups that you want to allow the HBase user to impersonate. The default '*' allows all groups. To disable entirely, use a string that does not correspond to a group name, such as '_no_group_'. <b>Note:</b> This property is used only if HBase REST/Thrift Server Authentication is enabled.",
"display_name": "HBase Proxy User Groups",
"name": "hbase_proxy_user_groups_list",
"value": "*"
},
{
"desc": "Number of rows to fetch when calling next on a scanner if it is not served from memory. Higher caching values enable faster scanners but require more memory and some calls of next may take longer when the cache is empty.",
"display_name": "HBase Client Scanner Caching",
"name": "hbase_client_scanner_caching",
"value": "100"
},
{
"desc": "<p>\nConfigures the rules for event tracking and coalescing. This feature is\nused to define equivalency between different audit events. When\nevents match, according to a set of configurable parameters, only one\nentry in the audit list is generated for all the matching events.\n</p>\n\n<p>\nTracking works by keeping a reference to events when they first appear,\nand comparing other incoming events against the \"tracked\" events according\nto the rules defined here.\n</p>\n\n<p>Event trackers are defined in a JSON object like the following:</p>\n\n<pre>\n{\n \"timeToLive\" : [integer],\n \"fields\" : [\n {\n \"type\" : [string],\n \"name\" : [string]\n }\n ]\n}\n</pre>\n\n<p>\nWhere:\n</p>\n\n<ul>\n <li>timeToLive: maximum amount of time an event will be tracked, in\n milliseconds. Must be provided. This defines how long, since it's\n first seen, an event will be tracked. A value of 0 disables tracking.</li>\n\n <li>fields: list of fields to compare when matching events against\n tracked events.</li>\n</ul>\n\n<p>\nEach field has an evaluator type associated with it. The evaluator defines\nhow the field data is to be compared. The following evaluators are\navailable:\n</p>\n\n<ul>\n <li>value: uses the field value for comparison.</li>\n\n <li>username: treats the field value as a user name, and ignores any\n host-specific data. This is useful for environment using Kerberos,\n so that only the principal name and realm are compared.</li>\n</ul>\n\n<p>\nThe following is the list of fields that can used to compare HBase events:\n</p>\n<ul>\n <li>allowed: whether the operation was allowed or denied.</li>\n <li>username: the user performing the action.</li>\n <li>scope: the scopeof the operation.</li>\n <li>family: the column family afftected by the operation.</li>\n <li>qualifier: the qualifier the operation.</li>\n <li>action: the action being performed.</li>\n</ul>\n",
"display_name": "Event Tracker",
"name": "navigator_event_tracker",
"value": "{\n \"comment\" : [\n \"Default event tracker for HBase services.\",\n \"Defines equality by comparing username, action, table name, family \",\n \"and qualifier of the events.\"\n ],\n \"timeToLive\" : 60000,\n \"fields\" : [\n { \"type\": \"value\", \"name\" : \"tableName\" },\n { \"type\": \"value\", \"name\" : \"family\" },\n { \"type\": \"value\", \"name\" : \"qualifier\" },\n { \"type\": \"value\", \"name\" : \"operation\" },\n { \"type\": \"username\", \"name\" : \"username\" }\n ]\n}\n"
},
{
"desc": "Action to take when the audit event queue is full. Drop the event or shutdown the affected process.",
"display_name": "Queue Policy",
"name": "navigator_audit_queue_policy",
"value": "DROP"
},
{
"desc": "Maximum number of errors that the HBase Hbck poller will retain through a given run",
"display_name": "HBase Hbck Poller Maximum Error Count",
"name": "hbase_hbck_poller_max_error_count",
"value": "10000"
},
{
"desc": "Timeout for all HBase RPCs in milliseconds.",
"display_name": "RPC Timeout",
"name": "hbase_rpc_timeout",
"value": "60000"
},
{
"desc": "Timeout (in ms) for the distributed log splitting manager to receive response from a worker.",
"display_name": "SplitLog Manager Timeout",
"name": "hbase_service_splitlog_manager_timeout",
"value": "120000"
},
{
"desc": "<p>Event filters are defined in a JSON object like the following:</p>\n\n<pre>\n{\n \"defaultAction\" : (\"accept\", \"discard\"),\n \"rules\" : [\n {\n \"action\" : (\"accept\", \"discard\"),\n \"fields\" : [\n {\n \"name\" : \"fieldName\",\n \"match\" : \"regex\"\n }\n ]\n }\n ]\n}\n</pre>\n\n<p>\nA filter has a default action and a list of rules, in order of precedence.\nEach rule defines an action, and a list of fields to match against the\naudit event.\n</p>\n\n<p>\nA rule is \"accepted\" if all the listed field entries match the audit\nevent. At that point, the action declared by the rule is taken.\n</p>\n\n<p>\nIf no rules match the event, the default action is taken. Actions\ndefault to \"accept\" if not defined in the JSON object.\n</p>\n\n<p>\nThe following is the list of fields that can be filtered for HBase events:\n</p>\n<ul>\n <li>allowed: whether the operation was allowed or denied.</li>\n <li>username: the user performing the action.</li>\n <li>tableName: the table affected by the operation.</li>\n <li>family: the column family affected by the operation.</li>\n <li>qualifier: the qualifier the operation.</li>\n <li>action: the action being performed.</li>\n</ul>\n",
"display_name": "Event Filter",
"name": "navigator_audit_event_filter",
"value": "{\n \"comment\" : [\n \"Default filter for HBase services.\",\n \"Discards events that affect the internal -ROOT-, .META. and _acl_ tables.\"\n ],\n \"defaultAction\" : \"accept\",\n \"rules\" : [\n {\n \"action\" : \"discard\",\n \"fields\" : [\n { \"name\" : \"tableName\", \"match\" : \"(?:-ROOT-|.META.|_acl_|hbase:meta|hbase:acl)\" }\n ]\n }\n ]\n}\n"
},
{
"desc": "The tolerance window that will be used in HBase service tests that depend on detection of the active HBase Master.",
"display_name": "HBase Active Master Detection Window",
"name": "hbase_active_master_detecton_window",
"value": "3"
},
{
"desc": "Allow indexing of tables in HBase by Lily HBase Indexer. <strong>Note:</strong> Replication must be enabled for indexing to work.",
"display_name": "Enable Indexing",
"name": "hbase_enable_indexing",
"value": "false"
},
{
"desc": "Name of the HDFS service that this HBase service instance depends on",
"display_name": "HDFS Service",
"name": "hdfs_service",
"value": null
},
{
"desc": "Period of time, in milliseconds, to pause between connection retries to ZooKeeper. Used together with ${zookeeper.retries} in an exponential backoff fashion when making queries to ZooKeeper.",
"display_name": "ZooKeeper Connection Retry Pause Duration",
"name": "zookeeper_pause",
"value": null
},
{
"desc": "For advanced use only, a string to be inserted into <strong>hbase-site.xml</strong>. Applies to configurations of all roles in this service except client configuration.",
"display_name": "HBase Service Advanced Configuration Snippet (Safety Valve) for hbase-site.xml",
"name": "hbase_service_config_safety_valve",
"value": null
},
{
"desc": "Enable SSL encryption for HBase web UIs",
"display_name": "Web UI SSL Encryption Enabled",
"name": "hbase_hadoop_ssl_enabled",
"value": "false"
},
{
"desc": "The root znode for HBase in ZooKeeper. All of HBase's ZooKeeper files that are configured with a relative path will go under this node. By default, all of HBase's ZooKeeper file paths are configured with a relative path, so they will all go under this directory unless changed.",
"display_name": "ZooKeeper Znode Parent",
"name": "zookeeper_znode_parent",
"value": "/hbase"
},
{
"desc": "AWS access key Id required to access S3 to store remote snapshots.",
"display_name": "AWS S3 access key Id for remote snapshots",
"name": "hbase_snapshot_s3_access_key_id",
"value": null
},
{
"desc": "When computing the overall HBase cluster health, consider the active HBase Master's health.",
"display_name": "Active Master Health Test",
"name": "hbase_master_health_enabled",
"value": "true"
},
{
"desc": "Maximum number of rolled over audit logs to retain. The logs will not be deleted if they contain audit events that have not yet been propagated to Audit Server.",
"display_name": "Number of Audit Logs to Retain",
"name": "navigator_audit_log_max_backup_index",
"value": "10"
},
{
"desc": "Choose the authentication mechanism used by HBase",
"display_name": "HBase Secure Authentication",
"name": "hbase_security_authentication",
"value": "simple"
},
{
"desc": "AWS secret access key required to access S3 to store remote snapshots.",
"display_name": "AWS S3 secret access key for remote snapshots",
"name": "hbase_snapshot_s3_secret_access_key",
"value": null
},
{
"desc": "An alert is published if the HBase Hbck tool runs slowly.",
"display_name": "HBase Hbck Slow Run Alert Enabled",
"name": "hbase_hbck_slow_run_alert_enabled",
"value": "true"
},
{
"desc": "Ratio of Lily HBase Indexers used by each HBase RegionServer while doing replication.",
"display_name": "Replication Source Ratio",
"name": "hbase_replication_source_ratio",
"value": "1.0"
},
{
"desc": "Name of the ZooKeeper service that this HBase service instance depends on",
"display_name": "ZooKeeper Service",
"name": "zookeeper_service",
"value": null
},
{
"desc": "Maximum size of audit log file in MB before it is rolled over.",
"display_name": "Maximum Audit Log File Size",
"name": "navigator_audit_log_max_file_size",
"value": "100"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>core-site.xml</strong>. Applies to configurations of all roles in this service except client configuration.",
"display_name": "HBase Service Advanced Configuration Snippet (Safety Valve) for core-site.xml",
"name": "hbase_core_site_safety_valve",
"value": null
},
{
"desc": "Password for the server keystore file used for encrypted web UIs",
"display_name": "SSL Server Keystore File Password",
"name": "ssl_server_keystore_password",
"value": null
},
{
"desc": "Size of the threadpool used for hedged reads in hdfs clients. If a read from a block is slow, a parallel 'hedged' read will be started against a different block replica. The first one to return with a result is used while the other one is cancelled. This 'hedged' read feature helps rein in the outliers. A value of zero disables the feature.",
"display_name": "HDFS Hedged Read Threadpool Size",
"name": "hbase_server_dfs_client_hedged_read_threadpool_size",
"value": "0"
},
{
"desc": "Configures whether the Hbck poller checks HDFS or not. Checking HBase tables and regions information on HDFS can take a while.",
"display_name": "HBase Hbck Check HDFS",
"name": "hbase_hbck_poller_check_hdfs",
"value": "true"
},
{
"desc": "If this is set to \"kerberos\", HBase REST Server will authenticate its clients. HBase Proxy User Hosts and Groups should be configured to allow specific users to access HBase through REST Server.",
"display_name": "HBase REST Authentication",
"name": "hbase_restserver_security_authentication",
"value": "simple"
},
{
"desc": "Tables to exclude in the HBase Region Health Canary which will scan a row from every region.",
"display_name": "HBase Region Health Canary Exclude Tables",
"name": "hbase_region_health_canary_exclude_tables",
"value": ""
},
{
"desc": "Specifies the combined maximum allowed size of a KeyValue instance. This option configures an upper boundary for a single entry saved in a storage file. This option prevents a region from splitting if the data is too large. Set this option to a fraction of the maximum region size. To disable this check, use a value of zero or less.",
"display_name": "Maximum Size of HBase Client KeyValue",
"name": "hbase_client_keyvalue_maxsize",
"value": "10485760"
},
{
"desc": "Path to the directory where audit logs will be written. The directory will be created if it doesn't exist.",
"display_name": "Audit Log Directory",
"name": "audit_event_log_dir",
"value": "/var/log/hbase/audit"
},
{
"desc": "Enable snapshots. Disabling snapshots requires deletion of all snapshots before restarting the HBase master; the HBase master will not start if snapshots are disabled and snapshots exist.",
"display_name": "Enable Snapshots",
"name": "hbase_snapshot_enabled",
"value": "true"
},
{
"desc": "For advanced use only, key-value pairs (one on each line) to be inserted into a role's environment. Applies to configurations of all roles in this service except client configuration.",
"display_name": "HBase Service Environment Advanced Configuration Snippet (Safety Valve)",
"name": "hbase_service_env_safety_valve",
"value": null
},
{
"desc": "Enables the canary that checks HBase region availability by scanning a row from every region.",
"display_name": "HBase Region Health Canary",
"name": "hbase_region_health_canary_enabled",
"value": "true"
},
{
"desc": "Timeout for graceful shutdown of this HBase service. Once this timeout is reached, any remaining running roles are abruptly shutdown. A value of 0 means no timeout.",
"display_name": "Graceful Shutdown Timeout",
"name": "hbase_graceful_stop_timeout",
"value": "180"
},
{
"desc": "An alert is published if the HBase Hbck tool detects at least this many regions with errors across all tables in this service. If the value is not set, alerts will not be published based on the count of regions with errors.",
"display_name": "HBase Hbck Region Error Count Alert Threshold",
"name": "hbase_hbck_alert_region_error_count_threshold",
"value": null
},
{
"desc": "An alert is published if the HBase Hbck tool finds any errors with matching codes. Possible error codes: UNKNOWN, NO_META_REGION, NULL_ROOT_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META, NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META, NOT_DEPLOYED, MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE, FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS, HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION, ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE",
"display_name": "HBase Hbck Alert Error Codes",
"name": "hbase_hbck_alert_error_codes",
"value": "NO_META_REGION,NULL_ROOT_REGION"
},
{
"desc": "AWS S3 path where remote snapshots should be stored.",
"display_name": "AWS S3 path for remote snapshots",
"name": "hbase_snapshot_s3_path",
"value": null
},
{
"desc": "Maximum number of client retries. Used as a maximum for all operations such as fetching of the root region from the root RegionServer, getting a cell's value, and starting a row update.",
"display_name": "Maximum HBase Client Retries",
"name": "hbase_client_retries_number",
"value": "35"
},
{
"desc": "An alert is published if the HBase Hbck tool detects at least this many errors across all tables in this service. Some errors are not associated with a region, e.g. 'RS_CONNECT_FAILURE'. If the value is not set, alerts will not be published based on the count of errors.",
"display_name": "HBase Hbck Error Count Alert Threshold",
"name": "hbase_hbck_alert_error_count_threshold",
"value": null
},
{
"desc": "Enable collection of audit events from the service's roles.",
"display_name": "Enable Collection",
"name": "navigator_audit_enabled",
"value": "true"
},
{
"desc": "The group that this service's processes should run as.",
"display_name": "System Group",
"name": "process_groupname",
"value": "hbase"
},
{
"desc": "Set to true to use HBase Secure RPC Engine for remote procedure calls (RPC). This is only effective in simple authentication mode. Does not provide authentication for RPC calls, but provides user information in the audit logs. Changing this setting requires a restart of this and all dependent services and redeployment of client configurations, along with a restart of the Service Monitor management role.",
"display_name": "HBase Secure RPC Engine",
"name": "hbase_secure_rpc_engine",
"value": "false"
},
{
"desc": "When set, Cloudera Manager will send alerts when the health of this service reaches the threshold specified by the EventServer setting eventserver_health_events_alert_threshold",
"display_name": "Enable Service Level Health Alerts",
"name": "enable_alerts",
"value": "true"
},
{
"desc": "The HDFS directory shared by HBase RegionServers",
"display_name": "HDFS Root Directory",
"name": "hdfs_rootdir",
"value": "/hbase"
},
{
"desc": "Enable HDFS short circuit read. This allows a client co-located with the DataNode to read HDFS file blocks directly. This gives a performance boost to distributed clients that are aware of locality.",
"display_name": "Enable HDFS Short Circuit Read",
"name": "dfs_client_read_shortcircuit",
"value": "true"
},
{
"desc": "For advanced use only, a string to be inserted into <strong>ssl-server.xml</strong>. Applies to configurations of all roles in this service except client configuration.",
"display_name": "HBase Service Advanced Configuration Snippet (Safety Valve) for ssl-server.xml",
"name": "hbase_ssl_server_safety_valve",
"value": null
},
{
"desc": "Password that protects the private key contained in the server keystore used for encrypted web UIs",
"display_name": "SSL Server Keystore Key Password",
"name": "ssl_server_keystore_keypassword",
"value": null
},
{
"desc": "Path to ZooKeeper Node holding root region location. This is written by the HBase Master and read by clients and RegionServers. If a relative path is given, the parent folder will be ${zookeeper.znode.parent}. By default, the root location is stored at /hbase/root-region-server.",
"display_name": "ZooKeeper Znode Rootserver",
"name": "zookeeper_znode_rootserver",
"value": "root-region-server"
},
{
"desc": "When computing the overall HBase cluster health, consider the health of the backup HBase Masters.",
"display_name": "Backup Masters Health Test",
"name": "hbase_backup_masters_health_enabled",
"value": "true"
},
{
"desc": "For advanced use only, a list of configuration properties that will be used by the Service Monitor instead of the current client configuration for the service.",
"display_name": "Service Monitor Client Config Overrides",
"name": "smon_client_config_overrides",
"value": "<property><name>zookeeper.recovery.retry</name><value>0</value></property><property><name>zookeeper.recovery.retry.intervalmill</name><value>3000</value></property><property><name>hbase.zookeeper.recoverable.waittime</name><value>1000</value></property><property><name>zookeeper.session.timeout</name><value>30000</value></property><property><name>hbase.rpc.timeout</name><value>10000</value></property><property><name>hbase.client.retries.number</name><value>1</value></property><property><name>hbase.client.rpc.maxattempts</name><value>1</value></property><property><name>hbase.client.operation.timeout</name><value>10000</value></property>"
},
{
"desc": "<p>The configured triggers for this service. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.</p><p>Each trigger has all of the following fields:</p><ul><li><code>triggerName</code> <strong>(mandatory)</strong> - the name of the trigger. This value must be unique for the specific service. </li><li><code>triggerExpression</code> <strong>(mandatory)</strong> - a tsquery expression representing the trigger. </li><li><code>streamThreshold</code> <strong>(optional)</strong> - the maximum number of streams that can satisfy a condition of a trigger before the condition fires. By default set to 0, and any stream returned causes the condition to fire. </li><li><code>enabled</code> <strong> (optional)</strong> - by default set to 'true'. If set to 'false' the trigger will not be evaluated.</li></ul></p><p>For example, here is a JSON formatted trigger that fires if there are more than 10 DataNodes with more than 500 file-descriptors opened:</p><p><pre>[{\"triggerName\": \"sample-trigger\",\n \"triggerExpression\": \"IF (SELECT fd_open WHERE roleType = DataNode and last(fd_open) > 500) DO health:bad\",\n \"streamThreshold\": 10, \"enabled\": \"true\"}]</pre></p><p>Consult the trigger rules documentation for more details on how to write triggers using tsquery.</p><p>The JSON format is evolving and may change in the future and as a result backward compatibility is not guaranteed between releases at this time.</p>",
"display_name": "Service Triggers",
"name": "service_triggers",
"value": "[]"
},
{
"desc": "An alert is published if the HBase region health canary runs slowly.",
"display_name": "HBase Region Health Canary Slow Run Alert Enabled",
"name": "hbase_region_health_canary_slow_run_alert_enabled",
"value": "true"
},
{
"desc": "Set to true to cause the hosting server (Master or RegionServer) to abort if a coprocessor throws a Throwable object that is not IOException or a subclass of IOException. Setting it to true might be useful in development environments where one wants to terminate the server as soon as possible to simplify coprocessor failure analysis.",
"display_name": "HBase Coprocessor Abort on Error",
"name": "hbase_coprocessor_abort_on_error",
"value": "false"
},
{
"desc": "When set, each role identifies important log events and forwards them to Cloudera Manager.",
"display_name": "Enable Log Event Capture",
"name": "catch_events",
"value": "true"
},
{
"desc": "Allow HBase tables to be replicated.",
"display_name": "Enable Replication",
"name": "hbase_enable_replication",
"value": "false"
},
{
"desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.",
"display_name": "Enable Configuration Change Alerts",
"name": "enable_config_alerts",
"value": "false"
},
{
"desc": "Path to the keystore file containing the server certificate and private key used for encrypted web UIs",
"display_name": "SSL Server Keystore File Location",
"name": "ssl_server_keystore_location",
"value": null
},
{
"desc": "Enable HBase authorization",
"display_name": "HBase Secure Authorization",
"name": "hbase_security_authorization",
"value": "false"
},
{
"desc": "Name of the scheduler pool to use for MR jobs created during export/import of remote snapshots in AWS S3.",
"display_name": "Scheduler pool for remote snapshots in AWS S3.",
"name": "hbase_snapshot_s3_scheduler_pool",
"value": null
},
{
"desc": "Period of time, in milliseconds, to pause between searches for work. Used as a sleep interval by service threads such as a META scanner and log roller.",
"display_name": "HBase Server Thread Wake Frequency",
"name": "hbase_server_thread_wakefrequency",
"value": "10000"
},
{
"desc": "An alert is published if the HBase region health canary detects at least this percentage of total regions are unhealthy. This threshold is used if the explicit count is not set via the hbase_canary_alert_unhealthy_region_count_threshold config.",
"display_name": "HBase Canary Unhealthy Region Percentage Alert Threshold",
"name": "hbase_canary_alert_unhealthy_region_percent_threshold",
"value": "0.1"
},
{
"desc": "The health test thresholds of the overall RegionServer health. The check returns \"Concerning\" health if the percentage of \"Healthy\" RegionServers falls below the warning threshold. The check is unhealthy if the total percentage of \"Healthy\" and \"Concerning\" RegionServers falls below the critical threshold.",
"display_name": "Healthy RegionServer Monitoring Thresholds",
"name": "hbase_regionservers_healthy_thresholds",
"value": "{\"critical\":\"90.0\",\"warning\":\"95.0\"}"
},
{
"desc": "For advanced use only, a string to be inserted into the client configuration for <strong>navigator.client.properties</strong>.",
"display_name": "HBASE Client Advanced Configuration Snippet (Safety Valve) for navigator.client.properties",
"name": "navigator_client_config_safety_valve",
"value": null
},
{
"desc": "Duration to wait before starting up a 'hedged' read.",
"display_name": "HDFS Hedged Read Delay Threshold",
"name": "hbase_server_dfs_client_hedged_read_threshold_millis",
"value": "500"
},
{
"desc": "Enables the HBase Hbck Poller so that Hbck reports will be available. Enabling the Hbck poller will increase the amount of memory used by the Service Monitor. Consider increasing the Service Monitor Java heap size by an additional 3KB per region. For example, for a cluster with 10,000 regions, increase the JVM heap size by approximately 30MB.",
"display_name": "HBase Hbck Poller",
"name": "hbase_hbck_poller_enabled",
"value": "false"
},
{
"desc": "List of users or groups, who are allowed full privileges, regardless of stored ACLs, across the cluster. Only used when HBase security is enabled.",
"display_name": "HBase Superusers",
"name": "hbase_superuser",
"value": ""
},
{
"desc": "Maximum number of hlog entries to replicate in one go. If this is large, and a consumer takes a while to process the events, the HBase RPC call will time out.",
"display_name": "Replication Batch Size",
"name": "hbase_replication_source_nb_capacity",
"value": "1000"
},
{
"desc": "An alert is published if the HBase region health canary detects at least this many unhealthy regions. This setting takes precedence over the hbase_canary_alert_unhealthy_region_percent_threshold config.",
"display_name": "HBase Canary Unhealthy Region Count Alert Threshold",
"name": "hbase_canary_alert_unhealthy_region_count_threshold",
"value": null
},
{
"desc": "The number of times to retry connections to ZooKeeper. Used for reading and writing root region location. Used together with ${zookeeper.pause} in an exponential backoff fashion when making queries to ZooKeeper.",
"display_name": "ZooKeeper Connection Retries",
"name": "zookeeper_retries",
"value": null
},
{
"desc": "ZooKeeper session timeout in milliseconds. HBase passes this to the ZooKeeper quorum as the suggested maximum time for a session. See http://hadoop.apache.org/zookeeper/docs/current/zookeeperProgrammers.html#ch_zkSessions The client sends a requested timeout, the server responds with the timeout that it can give the client.",
"display_name": "ZooKeeper Session Timeout",
"name": "zookeeper_session_timeout",
"value": "60000"
},
{
"desc": "For advanced use only, a list of derived configuration properties that will be used by the Service Monitor instead of the default ones.",
"display_name": "Service Monitor Derived Configs Advanced Configuration Snippet (Safety Valve)",
"name": "smon_derived_configs_safety_valve",
"value": null
},
{
"desc": "Write buffer size in bytes. A larger buffer requires more memory on both the client and the server because the server instantiates the passed write buffer to process it but reduces the number of remote procedure calls (RPC). To estimate the amount of server memory used, multiply the value of 'hbase.client.write.buffer' by the value of 'hbase.regionserver.handler.count'.",
"display_name": "HBase Client Write Buffer",
"name": "hbase_client_write_buffer",
"value": "2097152"
},
{
"desc": "The user the management services will impersonate when connecting to HBase. Defaults to 'hbase', a superuser.",
"display_name": "HBase User to Impersonate",
"name": "hbase_user_to_impersonate",
"value": "hbase"
}
]

View File

@ -62,3 +62,7 @@ def get_spark_historyserver(cluster):
def get_zookeepers(cluster):
return u.get_instances(cluster, 'SERVER')
def get_hbase_master(cluster):
return u.get_instance(cluster, 'MASTER')

View File

@ -116,6 +116,21 @@ def validate_cluster_creating(cluster):
raise ex.RequiredServiceMissingException(
'HIVEMETASTORE', required_by='HUE_SERVER')
hbm_count = _get_inst_count(cluster, 'MASTER')
hbr_count = _get_inst_count(cluster, 'REGIONSERVER')
zk_count = _get_inst_count(cluster, 'SERVER')
if hbm_count >= 1:
if zk_count < 1:
raise ex.RequiredServiceMissingException('ZOOKEEPER',
required_by='HBASE')
if hbr_count < 1:
raise ex.InvalidComponentCountException('REGIONSERVER',
_('at least 1'), hbr_count)
elif hbr_count >= 1:
raise ex.InvalidComponentCountException('MASTER',
_('at least 1'), hbm_count)
def validate_additional_ng_scaling(cluster, additional):
rm = cu.get_resourcemanager(cluster)