From fdf5c0810eb72d10b5a48673703ac9ac3693b304 Mon Sep 17 00:00:00 2001 From: Ryan Lane Date: Mon, 23 Sep 2013 14:17:17 -0700 Subject: [PATCH] Separate logstash/elasticsearch and verify hash To allow for reuse of elasticsearch I'm splitting it away from logstash. Also, I'm doing a hash check of the elasticsearch wget for better security. Change-Id: Iff42d538cd941abd50b000879ea4a237ea48d40e --- files/elasticsearch.default | 38 ---- files/elasticsearch.mapping.json | 19 -- manifests/elasticsearch.pp | 75 +------ templates/elasticsearch.yml.erb | 372 ------------------------------- 4 files changed, 1 insertion(+), 503 deletions(-) delete mode 100644 files/elasticsearch.default delete mode 100644 files/elasticsearch.mapping.json delete mode 100644 templates/elasticsearch.yml.erb diff --git a/files/elasticsearch.default b/files/elasticsearch.default deleted file mode 100644 index 1d1aac6..0000000 --- a/files/elasticsearch.default +++ /dev/null @@ -1,38 +0,0 @@ -# Run ElasticSearch as this user ID and group ID -#ES_USER=elasticsearch -#ES_GROUP=elasticsearch - -# Heap Size (defaults to 256m min, 1g max) -ES_HEAP_SIZE=16g - -# Heap new generation -#ES_HEAP_NEWSIZE= - -# max direct memory -#ES_DIRECT_SIZE= - -# Maximum number of open files, defaults to 65535. -#MAX_OPEN_FILES=65535 - -# Maximum locked memory size. Set to "unlimited" if you use the -# bootstrap.mlockall option in elasticsearch.yml. You must also set -# ES_HEAP_SIZE. -MAX_LOCKED_MEMORY=unlimited - -# ElasticSearch log directory -#LOG_DIR=/var/log/elasticsearch - -# ElasticSearch data directory -#DATA_DIR=/var/lib/elasticsearch - -# ElasticSearch work directory -#WORK_DIR=/tmp/elasticsearch - -# ElasticSearch configuration directory -#CONF_DIR=/etc/elasticsearch - -# ElasticSearch configuration file (elasticsearch.yml) -#CONF_FILE=/etc/elasticsearch/elasticsearch.yml - -# Additional Java OPTS -#ES_JAVA_OPTS= diff --git a/files/elasticsearch.mapping.json b/files/elasticsearch.mapping.json deleted file mode 100644 index ce5fbf6..0000000 --- a/files/elasticsearch.mapping.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "_default_": { - "_all": { "enabled": false }, - "_source": { "compress": true }, - "dynamic_templates": [ - { - "string_template" : { - "match" : "*", - "mapping": { "type": "string", "index": "not_analyzed" }, - "match_mapping_type" : "string" - } - } - ], - "properties" : { - "@fields": { "type": "object", "dynamic": true, "path": "full" }, - "@message" : { "type" : "string", "index" : "analyzed" } - } - } -} diff --git a/manifests/elasticsearch.pp b/manifests/elasticsearch.pp index eb45622..be958f3 100644 --- a/manifests/elasticsearch.pp +++ b/manifests/elasticsearch.pp @@ -14,50 +14,7 @@ # # Class to install elasticsearch. # -class logstash::elasticsearch ( - discover_nodes = ['localhost'] -) { - # install java runtime - package { 'java7-runtime-headless': - ensure => present, - } - - exec { 'get_elasticsearch_deb': - command => 'wget http://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-0.20.5.deb -O /tmp/elasticsearch-0.20.5.deb', - path => '/bin:/usr/bin', - creates => '/tmp/elasticsearch-0.20.5.deb', - } - - # install elastic search - package { 'elasticsearch': - ensure => latest, - source => '/tmp/elasticsearch-0.20.5.deb', - provider => 'dpkg', - subscribe => Exec['get_elasticsearch_deb'], - require => [ - Package['java7-runtime-headless'], - Exec['get_elasticsearch_deb'], - ] - } - - file { '/etc/elasticsearch/elasticsearch.yml': - ensure => present, - content => template('logstash/elasticsearch.yml.erb'), - replace => true, - owner => 'root', - group => 'root', - mode => '0644', - require => Package['elasticsearch'], - } - - file { '/etc/elasticsearch/templates': - ensure => directory, - owner => 'root', - group => 'root', - mode => '0755', - require => Package['elasticsearch'], - } - +class logstash::elasticsearch { file { '/etc/elasticsearch/templates/logstash_settings.json': ensure => present, source => 'puppet:///modules/logstash/es-logstash-template.json', @@ -67,34 +24,4 @@ class logstash::elasticsearch ( mode => '0644', require => File['/etc/elasticsearch/templates'], } - - file { '/etc/elasticsearch/default-mapping.json': - ensure => present, - source => 'puppet:///modules/logstash/elasticsearch.mapping.json', - replace => true, - owner => 'root', - group => 'root', - mode => '0644', - require => Package['elasticsearch'], - } - - file { '/etc/default/elasticsearch': - ensure => present, - source => 'puppet:///modules/logstash/elasticsearch.default', - replace => true, - owner => 'root', - group => 'root', - mode => '0644', - require => Package['elasticsearch'], - } - - service { 'elasticsearch': - ensure => running, - require => [ - Package['elasticsearch'], - File['/etc/elasticsearch/elasticsearch.yml'], - File['/etc/elasticsearch/default-mapping.json'], - File['/etc/default/elasticsearch'], - ], - } } diff --git a/templates/elasticsearch.yml.erb b/templates/elasticsearch.yml.erb deleted file mode 100644 index ed89730..0000000 --- a/templates/elasticsearch.yml.erb +++ /dev/null @@ -1,372 +0,0 @@ -##################### ElasticSearch Configuration Example ##################### - -# This file contains an overview of various configuration settings, -# targeted at operations staff. Application developers should -# consult the guide at . -# -# The installation procedure is covered at -# . -# -# ElasticSearch comes with reasonable defaults for most settings, -# so you can try it out without bothering with configuration. -# -# Most of the time, these defaults are just fine for running a production -# cluster. If you're fine-tuning your cluster, or wondering about the -# effect of certain configuration option, please _do ask_ on the -# mailing list or IRC channel [http://elasticsearch.org/community]. - -# Any element in the configuration can be replaced with environment variables -# by placing them in ${...} notation. For example: -# -# node.rack: ${RACK_ENV_VAR} - -# See -# for information on supported formats and syntax for the configuration file. - - -################################### Cluster ################################### - -# Cluster name identifies your cluster for auto-discovery. If you're running -# multiple clusters on the same network, make sure you're using unique names. -# -# cluster.name: elasticsearch - - -#################################### Node ##################################### - -# Node names are generated dynamically on startup, so you're relieved -# from configuring them manually. You can tie this node to a specific name: -# -node.name: "<%= scope.lookupvar("::hostname") %>" - -# Every node can be configured to allow or deny being eligible as the master, -# and to allow or deny to store the data. -# -# Allow this node to be eligible as a master node (enabled by default): -# -# node.master: true -# -# Allow this node to store data (enabled by default): -# -# node.data: true - -# You can exploit these settings to design advanced cluster topologies. -# -# 1. You want this node to never become a master node, only to hold data. -# This will be the "workhorse" of your cluster. -# -# node.master: false -# node.data: true -# -# 2. You want this node to only serve as a master: to not store any data and -# to have free resources. This will be the "coordinator" of your cluster. -# -# node.master: true -# node.data: false -# -# 3. You want this node to be neither master nor data node, but -# to act as a "search load balancer" (fetching data from nodes, -# aggregating results, etc.) -# -# node.master: false -# node.data: false - -# Use the Cluster Health API [http://localhost:9200/_cluster/health], the -# Node Info API [http://localhost:9200/_cluster/nodes] or GUI tools -# such as and -# to inspect the cluster state. - -# A node can have generic attributes associated with it, which can later be used -# for customized shard allocation filtering, or allocation awareness. An attribute -# is a simple key value pair, similar to node.key: value, here is an example: -# -# node.rack: rack314 - -# By default, multiple nodes are allowed to start from the same installation location -# to disable it, set the following: -# node.max_local_storage_nodes: 1 - - -#################################### Index #################################### - -# You can set a number of options (such as shard/replica options, mapping -# or analyzer definitions, translog settings, ...) for indices globally, -# in this file. -# -# Note, that it makes more sense to configure index settings specifically for -# a certain index, either when creating it or by using the index templates API. -# -# See and -# -# for more information. - -# Set the number of shards (splits) of an index (5 by default): -# -# index.number_of_shards: 5 - -# Set the number of replicas (additional copies) of an index (1 by default): -# -# index.number_of_replicas: 1 - -# Note, that for development on a local machine, with small indices, it usually -# makes sense to "disable" the distributed features: -# -# index.number_of_shards: 1 -# index.number_of_replicas: 0 - -# These settings directly affect the performance of index and search operations -# in your cluster. Assuming you have enough machines to hold shards and -# replicas, the rule of thumb is: -# -# 1. Having more *shards* enhances the _indexing_ performance and allows to -# _distribute_ a big index across machines. -# 2. Having more *replicas* enhances the _search_ performance and improves the -# cluster _availability_. -# -# The "number_of_shards" is a one-time setting for an index. -# -# The "number_of_replicas" can be increased or decreased anytime, -# by using the Index Update Settings API. -# -# ElasticSearch takes care about load balancing, relocating, gathering the -# results from nodes, etc. Experiment with different settings to fine-tune -# your setup. - -# Use the Index Status API () to inspect -# the index status. - -# Compress stored fields and term vector. -index.store.compress.stored: true -index.store.compress.tv: true - -indices.memory.index_buffer_size: "33%" - -#################################### Paths #################################### - -# Path to directory containing configuration (this file and logging.yml): -# -# path.conf: /path/to/conf - -# Path to directory where to store index data allocated for this node. -# -# path.data: /path/to/data -# -# Can optionally include more than one location, causing data to be striped across -# the locations (a la RAID 0) on a file level, favouring locations with most free -# space on creation. For example: -# -# path.data: /path/to/data1,/path/to/data2 - -# Path to temporary files: -# -# path.work: /path/to/work - -# Path to log files: -# -# path.logs: /path/to/logs - -# Path to where plugins are installed: -# -# path.plugins: /path/to/plugins - - -#################################### Plugin ################################### - -# If a plugin listed here is not installed for current node, the node will not start. -# -# plugin.mandatory: mapper-attachments,lang-groovy - - -################################### Memory #################################### - -# ElasticSearch performs poorly when JVM starts swapping: you should ensure that -# it _never_ swaps. -# -# Set this property to true to lock the memory: -# -bootstrap.mlockall: true - -# Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set -# to the same value, and that the machine has enough memory to allocate -# for ElasticSearch, leaving enough memory for the operating system itself. -# -# You should also make sure that the ElasticSearch process is allowed to lock -# the memory, eg. by using `ulimit -l unlimited`. - - -############################## Network And HTTP ############################### - -# ElasticSearch, by default, binds itself to the 0.0.0.0 address, and listens -# on port [9200-9300] for HTTP traffic and on port [9300-9400] for node-to-node -# communication. (the range means that if the port is busy, it will automatically -# try the next port). - -# Set the bind address specifically (IPv4 or IPv6): -# -# network.bind_host: 192.168.0.1 - -# Set the address other nodes will use to communicate with this node. If not -# set, it is automatically derived. It must point to an actual IP address. -# -# network.publish_host: 192.168.0.1 - -# Set both 'bind_host' and 'publish_host': -# -# network.host: 192.168.0.1 - -# Set a custom port for the node to node communication (9300 by default): -# -# transport.tcp.port: 9300 - -# Enable compression for all communication between nodes (disabled by default): -# -# transport.tcp.compress: true - -# Set a custom port to listen for HTTP traffic: -# -# http.port: 9200 - -# Set a custom allowed content length: -# -# http.max_content_length: 100mb - -# Disable HTTP completely: -# -# http.enabled: false - - -################################### Gateway ################################### - -# The gateway allows for persisting the cluster state between full cluster -# restarts. Every change to the state (such as adding an index) will be stored -# in the gateway, and when the cluster starts up for the first time, -# it will read its state from the gateway. - -# There are several types of gateway implementations. For more information, -# see . - -# The default gateway type is the "local" gateway (recommended): -# -# gateway.type: local - -# Settings below control how and when to start the initial recovery process on -# a full cluster restart (to reuse as much local data as possible when using shared -# gateway). - -# Allow recovery process after N nodes in a cluster are up: -# -gateway.recover_after_nodes: 5 - -# Set the timeout to initiate the recovery process, once the N nodes -# from previous setting are up (accepts time value): -# -gateway.recover_after_time: 5m - -# Set how many nodes are expected in this cluster. Once these N nodes -# are up (and recover_after_nodes is met), begin recovery process immediately -# (without waiting for recover_after_time to expire): -# -gateway.expected_nodes: 6 - - -############################# Recovery Throttling ############################# - -# These settings allow to control the process of shards allocation between -# nodes during initial recovery, replica allocation, rebalancing, -# or when adding and removing nodes. - -# Set the number of concurrent recoveries happening on a node: -# -# 1. During the initial recovery -# -# cluster.routing.allocation.node_initial_primaries_recoveries: 4 -# -# 2. During adding/removing nodes, rebalancing, etc -# -# cluster.routing.allocation.node_concurrent_recoveries: 2 - -# Set to throttle throughput when recovering (eg. 100mb, by default unlimited): -# -# indices.recovery.max_size_per_sec: 0 - -# Set to limit the number of open concurrent streams when -# recovering a shard from a peer: -# -# indices.recovery.concurrent_streams: 5 - - -################################## Discovery ################################## - -# Discovery infrastructure ensures nodes can be found within a cluster -# and master node is elected. Multicast discovery is the default. - -# Set to ensure a node sees N other master eligible nodes to be considered -# operational within the cluster. Set this option to a higher value (2-4) -# for large clusters (>3 nodes): -# -discovery.zen.minimum_master_nodes: 4 - -# Set the time to wait for ping responses from other nodes when discovering. -# Set this option to a higher value on a slow or congested network -# to minimize discovery failures: -# -# discovery.zen.ping.timeout: 3s - -# See -# for more information. - -# Unicast discovery allows to explicitly control which nodes will be used -# to discover the cluster. It can be used when multicast is not present, -# or to restrict the cluster communication-wise. -# -# 1. Disable multicast discovery (enabled by default): -# -discovery.zen.ping.multicast.enabled: false -# -# 2. Configure an initial list of master nodes in the cluster -# to perform discovery when new nodes (master or data) are started: -# -# discovery.zen.ping.unicast.hosts: ["host1", "host2:port", "host3[portX-portY]"] -# -# Make all master eligible nodes discover nodes. -discovery.zen.ping.unicast.hosts: ["<%= discover_nodes.join("\", \"") %>"] - -# EC2 discovery allows to use AWS EC2 API in order to perform discovery. -# -# You have to install the cloud-aws plugin for enabling the EC2 discovery. -# -# See -# for more information. -# -# See -# for a step-by-step tutorial. - - -################################## Slow Log ################################## - -# Shard level query and fetch threshold logging. - -#index.search.slowlog.threshold.query.warn: 10s -#index.search.slowlog.threshold.query.info: 5s -#index.search.slowlog.threshold.query.debug: 2s -#index.search.slowlog.threshold.query.trace: 500ms - -#index.search.slowlog.threshold.fetch.warn: 1s -#index.search.slowlog.threshold.fetch.info: 800ms -#index.search.slowlog.threshold.fetch.debug: 500ms -#index.search.slowlog.threshold.fetch.trace: 200ms - -#index.indexing.slowlog.threshold.index.warn: 10s -#index.indexing.slowlog.threshold.index.info: 5s -#index.indexing.slowlog.threshold.index.debug: 2s -#index.indexing.slowlog.threshold.index.trace: 500ms - -################################## GC Logging ################################ - -#monitor.jvm.gc.ParNew.warn: 1000ms -#monitor.jvm.gc.ParNew.info: 700ms -#monitor.jvm.gc.ParNew.debug: 400ms - -#monitor.jvm.gc.ConcurrentMarkSweep.warn: 10s -#monitor.jvm.gc.ConcurrentMarkSweep.info: 5s -#monitor.jvm.gc.ConcurrentMarkSweep.debug: 2s