Redis should perform backup using BGSAVE not SAVE

When Redis backup was implemented, using the save() command was
chosen as it blocks, making the code simpler to read and understand.
Unfortunately SAVE and BGSAVE have differences beyond one being
synchronous and the other asynchronous, in that SAVE also blocks
client connections while it runs. This behaviour is not optimal,
so the code was changed to use BGSAVE and wait until it completes.

In the case where bgsave() fails (possibly due to memory issues,
save() is called so that the backup will still complete.

The Redis helper was also modified to allow adding more data (to
increase the time the backup takes) and tests were rearranged
to improve chances of success.

Change-Id: I75d9df54b8550b08ce0112e516c17fd717ebb857
Closes-Bug: #1538334
This commit is contained in:
Peter Stachowski 2016-01-26 18:56:02 -05:00
parent 45d1936e32
commit eecca3f9e8
3 changed files with 69 additions and 43 deletions

View File

@ -34,7 +34,7 @@ from trove.guestagent.datastore import service
from trove.guestagent import pkg
LOG = logging.getLogger(__name__)
TIME_OUT = 1200 # FIXME(pmalik): should probably use config timeout
TIME_OUT = 1200
CONF = cfg.CONF
CLUSTER_CFG = 'clustering'
packager = pkg.Package()
@ -421,7 +421,29 @@ class RedisAdmin(object):
return self.__client.info(section=section)
def persist_data(self):
return self.__client.save()
save_cmd = 'SAVE'
last_save = self.__client.lastsave()
LOG.debug("Starting Redis data persist")
if self.__client.bgsave():
save_cmd = 'BGSAVE'
def _timestamp_changed():
return last_save != self.__client.lastsave()
try:
utils.poll_until(_timestamp_changed, sleep_time=2,
time_out=TIME_OUT)
except exception.PollTimeOut:
raise RuntimeError(_("Timeout occurred waiting for Redis "
"persist (%s) to complete.") % save_cmd)
# If the background save fails for any reason, try doing a foreground
# one. This blocks client connections, so we don't want it to be
# the default.
elif not self.__client.save():
raise exception.BackupCreationError(_("Could not persist "
"Redis data (%s)") % save_cmd)
LOG.debug("Redis data persist (%s) completed" % save_cmd)
def set_master(self, host=None, port=None):
self.__client.slaveof(host, port)

View File

@ -62,27 +62,6 @@ class BackupGroup(TestGroup):
@test(groups=[GROUP_BACKUP],
depends_on=[backup_create])
def restore_instance_from_not_completed_backup(self):
"""Ensure a restore fails while the backup is running."""
self.test_runner.run_restore_instance_from_not_completed_backup()
@test(groups=[GROUP_BACKUP],
depends_on=[backup_create],
runs_after=[restore_instance_from_not_completed_backup])
def instance_action_right_after_backup_create(self):
"""Ensure any instance action fails while backup is running."""
self.test_runner.run_instance_action_right_after_backup_create()
@test(groups=[GROUP_BACKUP],
depends_on=[backup_create],
runs_after=[instance_action_right_after_backup_create])
def backup_create_another_backup_running(self):
"""Ensure create backup fails when another backup is running."""
self.test_runner.run_backup_create_another_backup_running()
@test(groups=[GROUP_BACKUP],
depends_on=[backup_create],
runs_after=[backup_create_another_backup_running])
def backup_delete_while_backup_running(self):
"""Ensure delete backup fails while it is running."""
self.test_runner.run_backup_delete_while_backup_running()
@ -90,12 +69,31 @@ class BackupGroup(TestGroup):
@test(groups=[GROUP_BACKUP],
depends_on=[backup_create],
runs_after=[backup_delete_while_backup_running])
def restore_instance_from_not_completed_backup(self):
"""Ensure a restore fails while the backup is running."""
self.test_runner.run_restore_instance_from_not_completed_backup()
@test(groups=[GROUP_BACKUP],
depends_on=[backup_create],
runs_after=[restore_instance_from_not_completed_backup])
def backup_create_another_backup_running(self):
"""Ensure create backup fails when another backup is running."""
self.test_runner.run_backup_create_another_backup_running()
@test(groups=[GROUP_BACKUP],
depends_on=[backup_create],
runs_after=[backup_create_another_backup_running])
def instance_action_right_after_backup_create(self):
"""Ensure any instance action fails while backup is running."""
self.test_runner.run_instance_action_right_after_backup_create()
@test(groups=[GROUP_BACKUP],
depends_on=[backup_create],
runs_after=[instance_action_right_after_backup_create])
def backup_create_completed(self):
"""Check that the backup completes successfully."""
self.test_runner.run_backup_create_completed()
# TODO(peterstac) - Add support for incremental backups
@test(groups=[GROUP_BACKUP, GROUP_BACKUP_LIST],
depends_on=[backup_create_completed])
def backup_list(self):

View File

@ -25,7 +25,7 @@ class RedisHelper(TestHelper):
def __init__(self, expected_override_name):
super(RedisHelper, self).__init__(expected_override_name)
self.key_pattern = 'user:%s'
self.key_patterns = ['user_a:%s', 'user_b:%s']
self.value_pattern = 'id:%s'
self.label_value = 'value_set'
@ -40,12 +40,11 @@ class RedisHelper(TestHelper):
# (TIME_WAIT) before the port can be released.
# This is a feature of the operating system that helps it dealing with
# packets that arrive after the connection is closed.
if host in self._ds_client_cache:
return self._ds_client_cache[host]
if host not in self._ds_client_cache:
self._ds_client_cache[host] = (
self.create_client(host, *args, **kwargs))
client = self.create_client(host, *args, **kwargs)
self._ds_client_cache[host] = client
return client
return self._ds_client_cache[host]
def create_client(self, host, *args, **kwargs):
user = self.get_helper_credentials()
@ -53,15 +52,17 @@ class RedisHelper(TestHelper):
return client
# Add data overrides
# We use multiple keys to make the Redis backup take longer
def add_actual_data(self, data_label, data_start, data_size, host,
*args, **kwargs):
test_set = self._get_data_point(host, data_label, *args, **kwargs)
if not test_set:
for num in range(data_start, data_start + data_size):
self._set_data_point(
host,
self.key_pattern % str(num), self.value_pattern % str(num),
*args, **kwargs)
for key_pattern in self.key_patterns:
self._set_data_point(
host,
key_pattern % str(num), self.value_pattern % str(num),
*args, **kwargs)
# now that the data is there, add the label
self._set_data_point(
host,
@ -113,13 +114,15 @@ class RedisHelper(TestHelper):
raise ex
# Remove data overrides
# We use multiple keys to make the Redis backup take longer
def remove_actual_data(self, data_label, data_start, data_size, host,
*args, **kwargs):
test_set = self._get_data_point(host, data_label, *args, **kwargs)
if test_set:
for num in range(data_start, data_start + data_size):
self._expire_data_point(host, self.key_pattern % str(num),
*args, **kwargs)
for key_pattern in self.key_patterns:
self._expire_data_point(host, key_pattern % str(num),
*args, **kwargs)
# now that the data is gone, remove the label
self._expire_data_point(host, data_label, *args, **kwargs)
@ -131,6 +134,7 @@ class RedisHelper(TestHelper):
host, expire_point, [key], *args, **kwargs)
# Verify data overrides
# We use multiple keys to make the Redis backup take longer
def verify_actual_data(self, data_label, data_start, data_size, host,
*args, **kwargs):
# make sure the data is there - tests edge cases and a random one
@ -145,15 +149,17 @@ class RedisHelper(TestHelper):
random_num,
data_start + data_size - 2,
data_start + data_size - 1]:
self._verify_data_point(host,
self.key_pattern % num,
self.value_pattern % num,
*args, **kwargs)
for key_pattern in self.key_patterns:
self._verify_data_point(host,
key_pattern % num,
self.value_pattern % num,
*args, **kwargs)
# negative tests
for num in [data_start - 1,
data_start + data_size]:
self._verify_data_point(host, self.key_pattern % num, None,
*args, **kwargs)
for key_pattern in self.key_patterns:
self._verify_data_point(host, key_pattern % num, None,
*args, **kwargs)
def _verify_data_point(self, host, key, expected_value, *args, **kwargs):
value = self._get_data_point(host, key, *args, **kwargs)