trove/trove/guestagent/strategies/backup/experimental/postgresql_impl.py

258 lines
9.8 KiB
Python

# Copyright (c) 2013 OpenStack Foundation
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import os
import re
import stat
from oslo_log import log as logging
from trove.common import cfg
from trove.common import exception
from trove.common.i18n import _
from trove.common import utils
from trove.guestagent.common import operating_system
from trove.guestagent.common.operating_system import FileMode
from trove.guestagent.datastore.experimental.postgresql.service import PgSqlApp
from trove.guestagent.strategies.backup import base
CONF = cfg.CONF
LOG = logging.getLogger(__name__)
WAL_ARCHIVE_DIR = CONF.postgresql.wal_archive_location
class PgDump(base.BackupRunner):
"""Implementation of Backup Strategy for pg_dump."""
__strategy_name__ = 'pg_dump'
@property
def cmd(self):
cmd = 'sudo -u postgres pg_dumpall '
return cmd + self.zip_cmd + self.encrypt_cmd
class PgBaseBackupUtil(object):
def most_recent_backup_wal(self, pos=0):
"""
Return the WAL file for the most recent backup
"""
mrb_file = self.most_recent_backup_file(pos=pos)
# just return the first part of the filename
return mrb_file.split('.')[0]
def most_recent_backup_file(self, pos=0):
"""
Look for the most recent .backup file that basebackup creates
:return: a string like 000000010000000000000006.00000168.backup
"""
walre = re.compile("[0-9A-F]{24}.*.backup")
wal_files = [wal_file for wal_file in os.listdir(WAL_ARCHIVE_DIR)
if walre.search(wal_file)]
wal_files = sorted(wal_files, reverse=True)
if not wal_files:
return None
return wal_files[pos]
def log_files_since_last_backup(self, pos=0):
"""Return the WAL files since the provided last backup
pg_archivebackup depends on alphanumeric sorting to decide wal order,
so we'll do so too:
https://github.com/postgres/postgres/blob/REL9_4_STABLE/contrib
/pg_archivecleanup/pg_archivecleanup.c#L122
"""
last_wal = self.most_recent_backup_wal(pos=pos)
walre = re.compile("^[0-9A-F]{24}$")
wal_files = [wal_file for wal_file in os.listdir(WAL_ARCHIVE_DIR)
if walre.search(wal_file) and wal_file >= last_wal]
return wal_files
class PgBaseBackup(base.BackupRunner, PgBaseBackupUtil):
"""Base backups are taken with the pg_basebackup filesystem-level backup
tool pg_basebackup creates a copy of the binary files in the PostgreSQL
cluster data directory and enough WAL segments to allow the database to
be brought back to a consistent state. Associated with each backup is a
log location, normally indicated by the WAL file name and the position
inside the file.
"""
__strategy_name__ = 'pg_basebackup'
def __init__(self, *args, **kwargs):
self._app = None
super(PgBaseBackup, self).__init__(*args, **kwargs)
self.label = None
self.stop_segment = None
self.start_segment = None
self.start_wal_file = None
self.stop_wal_file = None
self.checkpoint_location = None
self.mrb = None
@property
def app(self):
if self._app is None:
self._app = self._build_app()
return self._app
def _build_app(self):
return PgSqlApp()
@property
def cmd(self):
cmd = ("pg_basebackup -h %s -U %s --pgdata=-"
" --label=%s --format=tar --xlog " %
(self.app.pgsql_run_dir, self.app.ADMIN_USER,
self.base_filename))
return cmd + self.zip_cmd + self.encrypt_cmd
def base_backup_metadata(self, metadata_file):
"""Parse the contents of the .backup file"""
metadata = {}
operating_system.chmod(
metadata_file, FileMode(add=[stat.S_IROTH]), as_root=True)
start_re = re.compile("START WAL LOCATION: (.*) \(file (.*)\)")
stop_re = re.compile("STOP WAL LOCATION: (.*) \(file (.*)\)")
checkpt_re = re.compile("CHECKPOINT LOCATION: (.*)")
label_re = re.compile("LABEL: (.*)")
metadata_contents = operating_system.read_file(metadata_file)
match = start_re.search(metadata_contents)
if match:
self.start_segment = match.group(1)
metadata['start-segment'] = self.start_segment
self.start_wal_file = match.group(2)
metadata['start-wal-file'] = self.start_wal_file
match = stop_re.search(metadata_contents)
if match:
self.stop_segment = match.group(1)
metadata['stop-segment'] = self.stop_segment
self.stop_wal_file = match.group(2)
metadata['stop-wal-file'] = self.stop_wal_file
match = checkpt_re.search(metadata_contents)
if match:
self.checkpoint_location = match.group(1)
metadata['checkpoint-location'] = self.checkpoint_location
match = label_re.search(metadata_contents)
if match:
self.label = match.group(1)
metadata['label'] = self.label
return metadata
def check_process(self):
# If any of the below variables were not set by either metadata()
# or direct retrieval from the pgsql backup commands, then something
# has gone wrong
if not self.start_segment or not self.start_wal_file:
LOG.info(_("Unable to determine starting WAL file/segment"))
return False
if not self.stop_segment or not self.stop_wal_file:
LOG.info(_("Unable to determine ending WAL file/segment"))
return False
if not self.label:
LOG.info(_("No backup label found"))
return False
return True
def metadata(self):
"""pg_basebackup may complete, and we arrive here before the
history file is written to the wal archive. So we need to
handle two possibilities:
- this is the first backup, and no history file exists yet
- this isn't the first backup, and so the history file we retrieve
isn't the one we just ran!
"""
def _metadata_found():
LOG.debug("Polling for backup metadata... ")
self.mrb = self.most_recent_backup_file()
if not self.mrb:
LOG.debug("No history files found!")
return False
metadata = self.base_backup_metadata(
os.path.join(WAL_ARCHIVE_DIR, self.mrb))
LOG.debug("Label to pg_basebackup: %(base_filename)s "
"label found: %(label)s",
{'base_filename': self.base_filename,
'label': metadata['label']})
LOG.info(_("Metadata for backup: %s."), str(metadata))
return metadata['label'] == self.base_filename
try:
utils.poll_until(_metadata_found, sleep_time=5, time_out=60)
except exception.PollTimeOut:
raise RuntimeError(_("Timeout waiting for backup metadata for"
" backup %s") % self.base_filename)
return self.base_backup_metadata(
os.path.join(WAL_ARCHIVE_DIR, self.mrb))
def _run_post_backup(self):
"""Get rid of WAL data we don't need any longer"""
arch_cleanup_bin = os.path.join(self.app.pgsql_extra_bin_dir,
"pg_archivecleanup")
bk_file = os.path.basename(self.most_recent_backup_file())
cmd_full = " ".join((arch_cleanup_bin, WAL_ARCHIVE_DIR, bk_file))
utils.execute("sudo", "su", "-", self.app.pgsql_owner, "-c",
"%s" % cmd_full)
class PgBaseBackupIncremental(PgBaseBackup):
"""To restore an incremental backup from a previous backup, in PostgreSQL,
is effectively to replay the WAL entries to a designated point in time.
All that is required is the most recent base backup, and all WAL files
"""
def __init__(self, *args, **kwargs):
if (not kwargs.get('parent_location') or
not kwargs.get('parent_checksum')):
raise AttributeError(_('Parent missing!'))
super(PgBaseBackupIncremental, self).__init__(*args, **kwargs)
self.parent_location = kwargs.get('parent_location')
self.parent_checksum = kwargs.get('parent_checksum')
def _run_pre_backup(self):
self.backup_label = self.base_filename
self.start_segment = self.app.pg_start_backup(self.backup_label)
self.start_wal_file = self.app.pg_xlogfile_name(self.start_segment)
self.stop_segment = self.app.pg_stop_backup()
# We have to hack this because self.command is
# initialized in the base class before we get here, which is
# when we will know exactly what WAL files we want to archive
self.command = self._cmd()
def _cmd(self):
wal_file_list = self.log_files_since_last_backup(pos=1)
cmd = 'sudo tar -cf - -C {wal_dir} {wal_list} '.format(
wal_dir=WAL_ARCHIVE_DIR,
wal_list=" ".join(wal_file_list))
return cmd + self.zip_cmd + self.encrypt_cmd
def metadata(self):
_meta = super(PgBaseBackupIncremental, self).metadata()
_meta.update({
'parent_location': self.parent_location,
'parent_checksum': self.parent_checksum,
})
return _meta