Add configurable monitor timeouts for ovn dbs

Under pressure, the default monitor timeout value of 20 seconds is not enough to prevent unnecessary failovers of the ovn-dbs pacemaker resource. While spawning a few VMs in the same time this could lead to unnecessary movements of master DB, then re-connections of ovn-controllers (slaves are read-only), further peaks of load on DBs, and at the end it could lead to snowball effect. Now this value can be configurable by dbs_timeout in tripleo::profile::pacemaker::ovn_dbs_bundle and by default is set to 60s. Change-Id: Ib95c6b7614631eed264d42e6cf61672b705e7893 Signed-off-by: Kamil Sambor <ksambor@redhat.com> Partial-Bug: #1853000 (cherry picked from commit 15e21010a8) (cherry picked from commit 223e786c57)
2019-10-17 15:30:58 +02:00 · 2019-10-17 15:30:58 +02:00 · 2631cb8c54
parent c0d16b9485
commit 2631cb8c54
2 changed files with 17 additions and 1 deletions
--- a/manifests/profile/pacemaker/ovn_dbs_bundle.pp
+++ b/manifests/profile/pacemaker/ovn_dbs_bundle.pp
@ -60,6 +60,10 @@
 #   (optional) Sets PCMK_tls_priorities in /etc/sysconfig/pacemaker when set
 #   Defaults to hiera('tripleo::pacemaker::tls_priorities', undef)
 #
+# [*dbs_timeout*]
+#   (Optional) timeout for monitor of ovn dbs resource
+#   Defaults to 60
+#

 class tripleo::profile::pacemaker::ovn_dbs_bundle (
  $ovn_dbs_docker_image = hiera('tripleo::profile::pacemaker::ovn_dbs_bundle::ovn_dbs_docker_image', undef),
@ -72,6 +76,7 @@ class tripleo::profile::pacemaker::ovn_dbs_bundle (
  $sb_db_port           = 6642,
  $container_backend    = 'docker',
  $tls_priorities       = hiera('tripleo::pacemaker::tls_priorities', undef),
+  $dbs_timeout          = hiera('tripleo::profile::pacemaker::ovn_dbs_bundle::dbs_timeout', 60),
 ) {

  if $::hostname == downcase($bootstrap_node) {
@ -153,7 +158,8 @@ class tripleo::profile::pacemaker::ovn_dbs_bundle (
      pacemaker::resource::ocf { "${ovndb_servers_resource_name}":
        ocf_agent_name  => "${ovndb_servers_ocf_name}",
        master_params   => '',
-        op_params       => 'start timeout=200s stop timeout=200s',
+        op_params       => "start timeout=200s stop timeout=200s monitor interval=10s role=Master timeout=${dbs_timeout}s \
+monitor interval=30s role=Slave timeout=${dbs_timeout}s",
        resource_params => "master_ip=${ovn_dbs_vip_norm} nb_master_port=${nb_db_port} \
 sb_master_port=${sb_db_port} manage_northd=yes inactive_probe_interval=180000",
        tries           => $pcs_tries,
--- a/releasenotes/notes/setup_timeouts_ovn_dbs-630a7ccfda5976a5.yaml
+++ b/releasenotes/notes/setup_timeouts_ovn_dbs-630a7ccfda5976a5.yaml
@ -0,0 +1,10 @@
+---
+features:
+  - |
+    Under pressure, the default monitor timeout value of 20 seconds is not
+    enough to prevent unnecessary failovers of the ovn-dbs pacemaker resource.
+    While spawning a few VMs in the same time this could lead to unnecessary
+    movements of master DB, then re-connections of ovn-controllers (slaves are
+    read-only), further peaks of load on DBs, and at the end it could lead to
+    snowball effect. Now this value can be configurable by dbs_timeout in
+    tripleo::profile::pacemaker::ovn_dbs_bundle and by default is set to 60s.