From 401c05413c9f0cf42f1fd2c4a4c65cd1b7688be5 Mon Sep 17 00:00:00 2001 From: SamYaple Date: Fri, 15 Jan 2016 23:01:37 +0000 Subject: [PATCH] Add initial backup code Updates backup.py and adds dump_manifest.py that will print out all hashes from the underlying blocks. This can be replicated with a simple python script as follows: # dump_disk.py import hashlib with open('/dev/loop0', 'rb') as f: while True: data = f.read(4*1024**2) if not data: break print(hashlib.sha1(data).hexdigest()) python dump_disk.py | grep -v 2bccbd2f38f15c13eb7d5a89fd9d85f595e23bc3 NOTE: 2bccbd2f38f15c13eb7d5a89fd9d85f595e23bc3 == segment of all zero To perform backup and dump info from manifest (which will match the output of the above script): tools/backup.py --backup /dev/loop0 --manifest mani tools/dump_manifest.py --manifest mani The backup works on both files and block devices with the raw driver. Further drivers will be implemented (such as qcow2 and rbd) as time goes on. Adjust sectors to equal number of bytes on size instead. Change-Id: I976f02a27bc13b5774a6088799ca61f65ec04f14 --- ekko/backup/__init__.py | 0 ekko/backup/_backend/__init__.py | 0 ekko/backup/_backend/raw.py | 29 ++++++ ekko/backup/_compression/__init__.py | 0 ekko/backup/_encryption/__init__.py | 0 ekko/backup/backend.py | 45 +++++++++ ekko/manifest/_drivers/osdk.py | 2 +- ekko/manifest/_drivers/sqlite.py | 15 ++- ekko/manifest/structure.py | 4 +- setup.cfg | 3 + tools/backup.py | 138 ++++++++++++++++----------- tools/dump_manifest.py | 54 +++++++++++ tools/generate_manifest.py | 3 +- 13 files changed, 226 insertions(+), 67 deletions(-) create mode 100644 ekko/backup/__init__.py create mode 100644 ekko/backup/_backend/__init__.py create mode 100644 ekko/backup/_backend/raw.py create mode 100644 ekko/backup/_compression/__init__.py create mode 100644 ekko/backup/_encryption/__init__.py create mode 100644 ekko/backup/backend.py mode change 100644 => 100755 tools/backup.py create mode 100755 tools/dump_manifest.py diff --git a/ekko/backup/__init__.py b/ekko/backup/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ekko/backup/_backend/__init__.py b/ekko/backup/_backend/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ekko/backup/_backend/raw.py b/ekko/backup/_backend/raw.py new file mode 100644 index 0000000..78daa79 --- /dev/null +++ b/ekko/backup/_backend/raw.py @@ -0,0 +1,29 @@ +# Copyright 2016 Sam Yaple +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ekko.backup import backend + + +class RawBackup(backend.BaseBackup): + + def get_data(self, reads): + with open(self.backup_location, 'rb') as f: + for start, size in reads: + f.seek(start, 0) + yield (start, f.read(size)) + + def get_size(self): + with open(self.backup_location, 'rb') as f: + f.seek(0, 2) + return f.tell() diff --git a/ekko/backup/_compression/__init__.py b/ekko/backup/_compression/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ekko/backup/_encryption/__init__.py b/ekko/backup/_encryption/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ekko/backup/backend.py b/ekko/backup/backend.py new file mode 100644 index 0000000..467d00e --- /dev/null +++ b/ekko/backup/backend.py @@ -0,0 +1,45 @@ +# Copyright 2016 Sam Yaple +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import abc + +import six + + +@six.add_metaclass(abc.ABCMeta) +class BaseBackup(object): + """Base class for Backup drivers + + :params backup_location: Location of device or file to backup + """ + + def __init__(self, backup_location): + self.backup_location = backup_location + + @abc.abstractmethod + def get_data(self, reads): + """Get data from backing device or file + + :params reads: A list of tuples with the start sector and size of read + :returns: An interable of tuples with start sector and data + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_size(self): + """Get size of disk to backup + + :returns: Size of disk in bytes + """ + raise NotImplementedError() diff --git a/ekko/manifest/_drivers/osdk.py b/ekko/manifest/_drivers/osdk.py index d2af6ad..f14a16b 100644 --- a/ekko/manifest/_drivers/osdk.py +++ b/ekko/manifest/_drivers/osdk.py @@ -48,7 +48,7 @@ class OSDKManifest(drivers.BaseManifest): '<2IQ24s', metadata.incremental, metadata.segment_size, - metadata.sectors, + metadata.size_of_disk, metadata.timestamp )) for backupset in metadata.backupsets: diff --git a/ekko/manifest/_drivers/sqlite.py b/ekko/manifest/_drivers/sqlite.py index 80530ef..de303ea 100644 --- a/ekko/manifest/_drivers/sqlite.py +++ b/ekko/manifest/_drivers/sqlite.py @@ -69,19 +69,26 @@ class SQLiteManifest(drivers.BaseManifest): return structure.Metadata( incremental=metadata['incremental'], - sectors=metadata['sectors'], + size_of_disk=metadata['size_of_disk'], segment_size=metadata['segment_size'], timestamp=metadata['timestamp'], backupset_id=backupsets[-1], backupsets=backupsets ) - def get_segments(self): + def get_segments(self, metadata): with self.get_conn() as conn: with closing(conn.cursor()) as cur: cur.execute("SELECT * FROM segments") for result in cur: - yield result + yield structure.Segment( + backupset_id=metadata.backupsets[result[5]], + incremental=result[0], + segment=result[1], + compression=result[2], + encryption=result[3], + segment_hash=str(result[4]) + ) def put_segments(self, segments, metadata): with self.get_conn() as conn: @@ -108,7 +115,7 @@ class SQLiteManifest(drivers.BaseManifest): [ ('incremental', metadata.incremental), ('segment_size', metadata.segment_size), - ('sectors', metadata.sectors), + ('size_of_disk', metadata.size_of_disk), ('timestamp', metadata.timestamp) ] ) diff --git a/ekko/manifest/structure.py b/ekko/manifest/structure.py index 11a79a8..f2e82dc 100644 --- a/ekko/manifest/structure.py +++ b/ekko/manifest/structure.py @@ -18,10 +18,10 @@ from uuid import uuid4 as uuid class Metadata(object): - def __init__(self, incremental, sectors, segment_size=None, + def __init__(self, incremental, size_of_disk, segment_size=None, timestamp=None, backupsets=None, backupset_id=None): self.timestamp = timestamp if timestamp else time.time() - self.sectors = sectors + self.size_of_disk = size_of_disk self.incremental = incremental self.segment_size = 4 * 1024 ** 2 # 4MiB self.backupset_id = backupset_id if backupset_id else uuid().bytes diff --git a/setup.cfg b/setup.cfg index 9a2e59e..41c29bf 100644 --- a/setup.cfg +++ b/setup.cfg @@ -20,6 +20,9 @@ classifier = Programming Language :: Python :: 3.4 [entry_points] +ekko.backup.backend = + raw = ekko.backup._backend.raw:RawBackup + qcow2 = ekko.backup._backend.qcow2:QCOW2Backup ekko.manifest.drivers = osdk = ekko.manifest._drivers.osdk:OSDKManifest sqlite = ekko.manifest._drivers.sqlite:SQLiteManifest diff --git a/tools/backup.py b/tools/backup.py old mode 100644 new mode 100755 index c1e1394..1cdc05c --- a/tools/backup.py +++ b/tools/backup.py @@ -17,78 +17,100 @@ # Copied and licensed from https://github.com/SamYaple/osdk +import argparse from hashlib import sha1 -from osdk import osdk -from uuid import uuid4 as uuid +import math +import os +import sys + +from ekko.manifest import structure as manifest_structure +from six.moves import range +from stevedore import driver -def get_disk_size(device): - with open(device, 'rb') as f: - return f.seek(0, 2) +def parse_args(): + parser = argparse.ArgumentParser(description='Backup Block Device') + parser.add_argument('--backup', required=True, + help='Path to backup file or device') + parser.add_argument('--manifest', required=True, + help='manifest file') + parser.add_argument('--cbt', required=False, + help='change block tracking info') + parser.add_argument('--backend', required=False, default='raw', + choices=['raw'], help='backend driver') + parser.add_argument('--driver', required=False, default='sqlite', + choices=['osdk', 'sqlite'], help='manifest driver') + return parser.parse_args() -def read_segments(f, lst, size, o): - zero_hash = sha1(bytes([0] * size)).hexdigest() +def read_segments(segments, metadata, backend): + size = metadata.segment_size + reads = [(segment * size, size) for segment in segments[:-1]] - for segment in lst: - f.seek(segment * size, 0) - data = f.read(size) - if not data: - raise Exception('Failed to read data') + # NOTE(SamYaple): If are reading the last segment on the disk and the + # normal read size is greater than the disk, shrink the read size to the + # appropriate size. + if segments[-1] * size > metadata.size_of_disk: + reads.append((segments[-1] * size, metadata.size_of_disk % size)) + else: + reads.append((segments[-1] * size, size)) - sha1_hash = sha1(data) - if sha1_hash.hexdigest() != zero_hash: - meta = dict() - meta['incremental'] = o.metadata['incremental'] - meta['base'] = len(o.metadata['bases']) - 1 - meta['encryption'] = 0 - meta['compression'] = 0 - meta['sha1_hash'] = sha1_hash.digest() - o.segments[segment] = meta - else: - try: - del o.segments[segment] - except KeyError: - pass + # NOTE(SamYaple): One of the few optimizations we may ever need to do is + # the dropping of segments that are 100% full of zero bytes. This can + # potentially greatly reduce the size of a manifest but more importantantly + # it reduces the number of objects we will need to track + with open('/dev/zero', 'rb') as f: + zero_blob = f.read(size) + + for start, data in backend.get_data(reads): + if data == zero_blob: + continue + yield manifest_structure.Segment( + metadata.backupset_id, + metadata.incremental, + start / size, + 0, + 0, + sha1(data).digest() + ) + + +def check_manifest(manifest_file): + return os.path.isfile(manifest_file) def main(): - device = '/dev/loop0' - old_manifest = 'manifest0.osdk' - manifest = 'manifest0.osdk' - manifest = 'manifest1.osdk' - segment_size = 4 * 1024**2 # 4MiB - size_of_disk = get_disk_size(device) - num_of_sectors = int(size_of_disk / 512) - num_of_segments = int(size_of_disk / segment_size) + args = parse_args() + if check_manifest(args.manifest): + print('manifest exists; exiting') + return - o = osdk(manifest) - o.metadata['sectors'] = num_of_sectors + manifest = driver.DriverManager( + namespace='ekko.manifest.drivers', + name=args.driver, + invoke_on_load=True, + invoke_args=[args.manifest] + ).driver - new = True - new = False - existing = True - existing_full = True - existing_full = False + backend = driver.DriverManager( + namespace='ekko.backup.backend', + name=args.backend, + invoke_on_load=True, + invoke_args=[args.backup] + ).driver - if new: - o.metadata['incremental'] = 0 - o.metadata['segment_size'] = segment_size - o.metadata['bases'] = [uuid().bytes] - segments_to_read = range(0, num_of_segments - 1) - elif existing: - o.read_manifest(old_manifest) - o.metadata['incremental'] += 1 - segments_to_read = range(1, num_of_segments - 1) - elif existing_full: - o.read_manifest(old_manifest) - o.metadata['incremental'] += 1 - segments_to_read = range(0, num_of_segments - 1) + size_of_disk = backend.get_size() + incremental = 0 + metadata = manifest_structure.Metadata(incremental, size_of_disk) - with open(device, 'rb+') as f: - read_segments(f, segments_to_read, segment_size, o) + manifest.initialize() + manifest.put_metadata(metadata) - o.write_manifest() + segments_list = list(range(0, int(math.ceil( + float(size_of_disk)/metadata.segment_size)))) + segments = read_segments(segments_list, metadata, backend) + + manifest.put_segments(segments, metadata) if __name__ == '__main__': - main() + sys.exit(main()) diff --git a/tools/dump_manifest.py b/tools/dump_manifest.py new file mode 100755 index 0000000..40c2626 --- /dev/null +++ b/tools/dump_manifest.py @@ -0,0 +1,54 @@ +#!/usr/bin/python + +# Copyright 2016 Sam Yaple +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copied and licensed from https://github.com/SamYaple/osdk + + +import argparse +import binascii +import os +import sys + +from stevedore import driver + + +def parse_args(): + parser = argparse.ArgumentParser(description='Backup Block Device') + parser.add_argument('--manifest', required=True, + help='manifest file') + parser.add_argument('--driver', required=False, default='sqlite', + choices=['osdk', 'sqlite'], help='manifest driver') + return parser.parse_args() + + +def check_manifest(manifest_file): + return os.path.isfile(manifest_file) + + +def main(): + args = parse_args() + manifest = driver.DriverManager( + namespace='ekko.manifest.drivers', + name=args.driver, + invoke_on_load=True, + invoke_args=[args.manifest] + ).driver + + for segment in manifest.get_segments(manifest.get_metadata()): + print(binascii.b2a_hex(segment.segment_hash)) + +if __name__ == '__main__': + sys.exit(main()) diff --git a/tools/generate_manifest.py b/tools/generate_manifest.py index e949f1a..422f8bd 100755 --- a/tools/generate_manifest.py +++ b/tools/generate_manifest.py @@ -69,9 +69,8 @@ def main(): ).driver size_of_disk = args.backupsize * 1024**3 # Convert GB to B - num_of_sectors = int(size_of_disk / 512) incremental = 0 - metadata = manifest_structure.Metadata(incremental, sectors=num_of_sectors) + metadata = manifest_structure.Metadata(incremental, size_of_disk) manifest.initialize() manifest.put_metadata(metadata)