diff --git a/ceph/utils.py b/ceph/utils.py index a6e827a..0f9d23a 100644 --- a/ceph/utils.py +++ b/ceph/utils.py @@ -1805,25 +1805,28 @@ def wait_on_previous_node(upgrade_key, service, previous_node, version): previous_node_start_time = monitor_key_get( upgrade_key, "{}_{}_{}_start".format(service, previous_node, version)) - if (current_timestamp - (10 * 60)) > previous_node_start_time: - # Previous node is probably dead. Lets move on - if previous_node_start_time is not None: - log( - "Waited 10 mins on node {}. current time: {} > " - "previous node start time: {} Moving on".format( - previous_node, - (current_timestamp - (10 * 60)), - previous_node_start_time)) - return - else: - # I have to wait. Sleep a random amount of time and then - # check if I can lock,upgrade and roll. - wait_time = random.randrange(5, 30) - log('waiting for {} seconds'.format(wait_time)) - time.sleep(wait_time) - previous_node_finished = monitor_key_exists( - upgrade_key, - "{}_{}_{}_done".format(service, previous_node, version)) + if (previous_node_start_time is not None and + ((current_timestamp - (10 * 60)) > + float(previous_node_start_time))): + # NOTE(jamespage): + # Previous node is probably dead as we've been waiting + # for 10 minutes - lets move on and upgrade + log("Waited 10 mins on node {}. current time: {} > " + "previous node start time: {} Moving on".format( + previous_node, + (current_timestamp - (10 * 60)), + previous_node_start_time)) + return + # NOTE(jamespage) + # Previous node has not started, or started less than + # 10 minutes ago - sleep a random amount of time and + # then check again. + wait_time = random.randrange(5, 30) + log('waiting for {} seconds'.format(wait_time)) + time.sleep(wait_time) + previous_node_finished = monitor_key_exists( + upgrade_key, + "{}_{}_{}_done".format(service, previous_node, version)) def get_upgrade_position(osd_sorted_list, match_name): diff --git a/unit_tests/test_mon_upgrade_roll.py b/unit_tests/test_mon_upgrade_roll.py index f1ac310..6a763ba 100644 --- a/unit_tests/test_mon_upgrade_roll.py +++ b/unit_tests/test_mon_upgrade_roll.py @@ -46,7 +46,10 @@ def monitor_key_side_effect(*args): elif args[1] == \ 'mon_ip-192-168-1-2_0.94.1_start': # Return that the previous node started 9 minutes ago - return previous_node_start_time + # NOTE(jamespage): + # Pass back as string as this is what we actually get + # from the monitor cluster + return str(previous_node_start_time) class UpgradeRollingTestCase(unittest.TestCase): @@ -292,4 +295,4 @@ class UpgradeRollingTestCase(unittest.TestCase): [call('Previous node is: ip-192-168-1-2')], [call('ip-192-168-1-2 is not finished. Waiting')], ) - self.assertEqual(tval[0], previous_node_start_time + 700) + self.assertGreaterEqual(tval[0], previous_node_start_time + 600)