From df0b7e5737c5dc17e1f4d5eaf40fd67311ec7007 Mon Sep 17 00:00:00 2001 From: Eric K Date: Tue, 19 Feb 2019 13:01:23 -0800 Subject: [PATCH] Resolve test_datasource_db_sync_add_remove instability Configure the replica server in the test to use a short PRC timeout. The instability appears to arise as follows. In starting the replica Congress instance: If Congress API process starts before Congress policy engine process, the call to check Congress started is processed by the API process, which makes an RPC call to the policy engine process. But the policy engine process may be a bit slower to start. In that case, the RPC call may never reach the policy engine process even if the policy engine process starts up just a few seconds later. So the call times out according to the default 60 seconds, by which point the entire retry period of the test is used up and the test fails. In this fix, the RPC timeout is set to a short 10 seconds on the replica instance. That way, if the policy engine has not started by the time of the first RPC call, the RPC call would time out with time remaining on the test retry period for further retry. The test retry period is also increased because occassionally the replica policy engine takes longer than the allotted 60s to start up. Change-Id: Ied8c95c4a60a3b6ea2c617fdfe94d4d7cf7a5822 --- congress_tempest_plugin/tests/scenario/congress_ha/test_ha.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/congress_tempest_plugin/tests/scenario/congress_ha/test_ha.py b/congress_tempest_plugin/tests/scenario/congress_ha/test_ha.py index 58a4a75..8873873 100644 --- a/congress_tempest_plugin/tests/scenario/congress_ha/test_ha.py +++ b/congress_tempest_plugin/tests/scenario/congress_ha/test_ha.py @@ -78,6 +78,7 @@ class TestHA(manager_congress.ScenarioPolicyBase): index = conf.find('[DEFAULT]') + len('[DEFAULT]\n') conf = (conf[:index] + 'bind_port = %d\n' % port_num + + 'rpc_response_timeout = 10\n' + conf[index:]) # set datasource sync period interval to 5 conf = conf.replace('datasource_sync_period = 30', @@ -212,7 +213,7 @@ class TestHA(manager_congress.ScenarioPolicyBase): if not test_utils.call_until_true( func=lambda: self._check_replica_server_status( replica_client), - duration=60, sleep_for=1): + duration=90, sleep_for=2): for port_num in self.replicas: procs = self.replicas[port_num][0] for service_key in procs: