From 8015d9a365410efa5ef6d952ed33364b1679c0ef Mon Sep 17 00:00:00 2001 From: Nicolas Bock Date: Wed, 1 Apr 2020 14:56:16 -0600 Subject: [PATCH] Add config parameters to tune mnesia settings When a RabbitMQ cluster is restarted, the mnesia settings determine how long and how often each broker will try to connect to the cluster before giving up. It might be useful for an operator to be able to tune these parameters. This change adds two settings, `mnesia-table-loading-retry-timeout` and `mnesia-table-loading-retry-limit`, which set these parameters in the rabbitmq.config file [1]. [1] https://www.rabbitmq.com/configure.html#config-items Change-Id: I96aa8c4061aed47eb2e844d1bec44fafd379ac25 Partial-Bug: #1828988 Related-Bug: #1874075 Co-authored-by: Nicolas Bock Co-authored-by: Aurelien Lourot --- config.yaml | 17 +++++++++++++++ hooks/rabbitmq_context.py | 6 +++++- templates/rabbitmq.config | 6 ++++++ unit_tests/test_rabbitmq_context.py | 32 ++++++++++++++++++++++------- 4 files changed, 53 insertions(+), 8 deletions(-) diff --git a/config.yaml b/config.yaml index 9577d5ec..287b9ea3 100644 --- a/config.yaml +++ b/config.yaml @@ -59,6 +59,23 @@ options: cluster. . For more information see http://www.rabbitmq.com/partitions.html + mnesia-table-loading-retry-timeout: + type: int + default: 30000 + description: | + Timeout in milliseconds used when waiting for Mnesia tables in a + cluster to become available. + . + https://www.rabbitmq.com/configure.html#config-items + mnesia-table-loading-retry-limit: + type: int + default: 10 + description: | + Retries when waiting for Mnesia tables during cluster startup. + Note that this setting is not applied to Mnesia upgrades or node + deletions. + . + https://www.rabbitmq.com/configure.html#config-items use-syslog: type: boolean default: False diff --git a/hooks/rabbitmq_context.py b/hooks/rabbitmq_context.py index 90853e22..95da30f6 100644 --- a/hooks/rabbitmq_context.py +++ b/hooks/rabbitmq_context.py @@ -181,7 +181,11 @@ class RabbitMQClusterContext(object): def __call__(self): ctxt = {'cluster_partition_handling': - config('cluster-partition-handling')} + config('cluster-partition-handling'), + 'mnesia_table_loading_retry_timeout': + config('mnesia-table-loading-retry-timeout'), + 'mnesia_table_loading_retry_limit': + config('mnesia-table-loading-retry-limit')} if config('connection-backlog'): ctxt['connection_backlog'] = config('connection-backlog') diff --git a/templates/rabbitmq.config b/templates/rabbitmq.config index 72998ce8..a4106f55 100644 --- a/templates/rabbitmq.config +++ b/templates/rabbitmq.config @@ -34,6 +34,12 @@ {%- if queue_master_locator %} {queue_master_locator, <<"{{ queue_master_locator }}">>}, {%- endif %} +{%- if mnesia_table_loading_retry_timeout %} + {mnesia_table_loading_retry_timeout, {{ mnesia_table_loading_retry_timeout }}}, +{%- endif %} +{%- if mnesia_table_loading_retry_limit %} + {mnesia_table_loading_retry_limit, {{ mnesia_table_loading_retry_limit }}}, +{%- endif %} {%- if cluster_partition_handling %} {cluster_partition_handling, {{ cluster_partition_handling }}} {%- endif %} diff --git a/unit_tests/test_rabbitmq_context.py b/unit_tests/test_rabbitmq_context.py index d3a61a38..0072bb72 100644 --- a/unit_tests/test_rabbitmq_context.py +++ b/unit_tests/test_rabbitmq_context.py @@ -89,6 +89,8 @@ class TestRabbitMQClusterContext(unittest.TestCase): def test_context_ssl_off(self, config, mock_cmp_pkgrevno): config_data = {'cluster-partition-handling': 'ignore', 'connection-backlog': 200, + 'mnesia-table-loading-retry-timeout': 25000, + 'mnesia-table-loading-retry-limit': 12, 'queue-master-locator': 'client-local'} config.side_effect = config_data.get mock_cmp_pkgrevno.return_value = 0 @@ -96,19 +98,26 @@ class TestRabbitMQClusterContext(unittest.TestCase): self.assertEqual( rabbitmq_context.RabbitMQClusterContext().__call__(), { 'cluster_partition_handling': "ignore", + 'mnesia_table_loading_retry_timeout': 25000, + 'mnesia_table_loading_retry_limit': 12, 'connection_backlog': 200, 'queue_master_locator': 'client-local', }) - config.assert_has_calls([mock.call("cluster-partition-handling"), - mock.call("connection-backlog")], - mock.call('queue-master-locator')) + config.assert_has_calls( + [mock.call("cluster-partition-handling"), + mock.call("mnesia-table-loading-retry-timeout"), + mock.call("mnesia-table-loading-retry-limit"), + mock.call("connection-backlog")], + mock.call('queue-master-locator')) @mock.patch.object(rabbitmq_context, 'cmp_pkgrevno') @mock.patch("rabbitmq_context.config") def test_queue_master_locator_min_masters(self, config, mock_cmp_pkgrevno): config_data = {'cluster-partition-handling': 'ignore', 'connection-backlog': 200, + 'mnesia-table-loading-retry-timeout': 25000, + 'mnesia-table-loading-retry-limit': 12, 'queue-master-locator': 'min-masters'} config.side_effect = config_data.get mock_cmp_pkgrevno.return_value = 0 @@ -117,6 +126,8 @@ class TestRabbitMQClusterContext(unittest.TestCase): rabbitmq_context.RabbitMQClusterContext().__call__(), { 'cluster_partition_handling': "ignore", 'connection_backlog': 200, + 'mnesia_table_loading_retry_timeout': 25000, + 'mnesia_table_loading_retry_limit': 12, 'queue_master_locator': 'min-masters', }) @@ -128,8 +139,10 @@ class TestRabbitMQClusterContext(unittest.TestCase): @mock.patch("rabbitmq_context.config") def test_rabbit_server_3pt6(self, config, mock_cmp_pkgrevno): config_data = {'cluster-partition-handling': 'ignore', - 'queue-master-locator': 'min-masters', - 'connection-backlog': 200} + 'connection-backlog': 200, + 'mnesia-table-loading-retry-timeout': 25000, + 'mnesia-table-loading-retry-limit': 12, + 'queue-master-locator': 'min-masters'} config.side_effect = config_data.get mock_cmp_pkgrevno.return_value = -1 @@ -137,10 +150,15 @@ class TestRabbitMQClusterContext(unittest.TestCase): rabbitmq_context.RabbitMQClusterContext().__call__(), { 'cluster_partition_handling': "ignore", 'connection_backlog': 200, + 'mnesia_table_loading_retry_timeout': 25000, + 'mnesia_table_loading_retry_limit': 12, }) - config.assert_has_calls([mock.call("cluster-partition-handling"), - mock.call("connection-backlog")]) + config.assert_has_calls( + [mock.call("cluster-partition-handling"), + mock.call("mnesia-table-loading-retry-timeout"), + mock.call("mnesia-table-loading-retry-limit"), + mock.call("connection-backlog")]) assert mock.call('queue-master-locator') not in config.mock_calls