diff --git a/ekko/backup/__init__.py b/ekko/backup/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ekko/backup/_backend/__init__.py b/ekko/backup/_backend/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ekko/backup/_backend/raw.py b/ekko/backup/_backend/raw.py new file mode 100644 index 0000000..78daa79 --- /dev/null +++ b/ekko/backup/_backend/raw.py @@ -0,0 +1,29 @@ +# Copyright 2016 Sam Yaple +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ekko.backup import backend + + +class RawBackup(backend.BaseBackup): + + def get_data(self, reads): + with open(self.backup_location, 'rb') as f: + for start, size in reads: + f.seek(start, 0) + yield (start, f.read(size)) + + def get_size(self): + with open(self.backup_location, 'rb') as f: + f.seek(0, 2) + return f.tell() diff --git a/ekko/backup/_compression/__init__.py b/ekko/backup/_compression/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ekko/backup/_encryption/__init__.py b/ekko/backup/_encryption/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ekko/backup/backend.py b/ekko/backup/backend.py new file mode 100644 index 0000000..467d00e --- /dev/null +++ b/ekko/backup/backend.py @@ -0,0 +1,45 @@ +# Copyright 2016 Sam Yaple +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import abc + +import six + + +@six.add_metaclass(abc.ABCMeta) +class BaseBackup(object): + """Base class for Backup drivers + + :params backup_location: Location of device or file to backup + """ + + def __init__(self, backup_location): + self.backup_location = backup_location + + @abc.abstractmethod + def get_data(self, reads): + """Get data from backing device or file + + :params reads: A list of tuples with the start sector and size of read + :returns: An interable of tuples with start sector and data + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_size(self): + """Get size of disk to backup + + :returns: Size of disk in bytes + """ + raise NotImplementedError() diff --git a/ekko/manifest/_drivers/osdk.py b/ekko/manifest/_drivers/osdk.py index d2af6ad..f14a16b 100644 --- a/ekko/manifest/_drivers/osdk.py +++ b/ekko/manifest/_drivers/osdk.py @@ -48,7 +48,7 @@ class OSDKManifest(drivers.BaseManifest): '<2IQ24s', metadata.incremental, metadata.segment_size, - metadata.sectors, + metadata.size_of_disk, metadata.timestamp )) for backupset in metadata.backupsets: diff --git a/ekko/manifest/_drivers/sqlite.py b/ekko/manifest/_drivers/sqlite.py index 80530ef..de303ea 100644 --- a/ekko/manifest/_drivers/sqlite.py +++ b/ekko/manifest/_drivers/sqlite.py @@ -69,19 +69,26 @@ class SQLiteManifest(drivers.BaseManifest): return structure.Metadata( incremental=metadata['incremental'], - sectors=metadata['sectors'], + size_of_disk=metadata['size_of_disk'], segment_size=metadata['segment_size'], timestamp=metadata['timestamp'], backupset_id=backupsets[-1], backupsets=backupsets ) - def get_segments(self): + def get_segments(self, metadata): with self.get_conn() as conn: with closing(conn.cursor()) as cur: cur.execute("SELECT * FROM segments") for result in cur: - yield result + yield structure.Segment( + backupset_id=metadata.backupsets[result[5]], + incremental=result[0], + segment=result[1], + compression=result[2], + encryption=result[3], + segment_hash=str(result[4]) + ) def put_segments(self, segments, metadata): with self.get_conn() as conn: @@ -108,7 +115,7 @@ class SQLiteManifest(drivers.BaseManifest): [ ('incremental', metadata.incremental), ('segment_size', metadata.segment_size), - ('sectors', metadata.sectors), + ('size_of_disk', metadata.size_of_disk), ('timestamp', metadata.timestamp) ] ) diff --git a/ekko/manifest/structure.py b/ekko/manifest/structure.py index 11a79a8..f2e82dc 100644 --- a/ekko/manifest/structure.py +++ b/ekko/manifest/structure.py @@ -18,10 +18,10 @@ from uuid import uuid4 as uuid class Metadata(object): - def __init__(self, incremental, sectors, segment_size=None, + def __init__(self, incremental, size_of_disk, segment_size=None, timestamp=None, backupsets=None, backupset_id=None): self.timestamp = timestamp if timestamp else time.time() - self.sectors = sectors + self.size_of_disk = size_of_disk self.incremental = incremental self.segment_size = 4 * 1024 ** 2 # 4MiB self.backupset_id = backupset_id if backupset_id else uuid().bytes diff --git a/setup.cfg b/setup.cfg index 9a2e59e..41c29bf 100644 --- a/setup.cfg +++ b/setup.cfg @@ -20,6 +20,9 @@ classifier = Programming Language :: Python :: 3.4 [entry_points] +ekko.backup.backend = + raw = ekko.backup._backend.raw:RawBackup + qcow2 = ekko.backup._backend.qcow2:QCOW2Backup ekko.manifest.drivers = osdk = ekko.manifest._drivers.osdk:OSDKManifest sqlite = ekko.manifest._drivers.sqlite:SQLiteManifest diff --git a/tools/backup.py b/tools/backup.py old mode 100644 new mode 100755 index c1e1394..1cdc05c --- a/tools/backup.py +++ b/tools/backup.py @@ -17,78 +17,100 @@ # Copied and licensed from https://github.com/SamYaple/osdk +import argparse from hashlib import sha1 -from osdk import osdk -from uuid import uuid4 as uuid +import math +import os +import sys + +from ekko.manifest import structure as manifest_structure +from six.moves import range +from stevedore import driver -def get_disk_size(device): - with open(device, 'rb') as f: - return f.seek(0, 2) +def parse_args(): + parser = argparse.ArgumentParser(description='Backup Block Device') + parser.add_argument('--backup', required=True, + help='Path to backup file or device') + parser.add_argument('--manifest', required=True, + help='manifest file') + parser.add_argument('--cbt', required=False, + help='change block tracking info') + parser.add_argument('--backend', required=False, default='raw', + choices=['raw'], help='backend driver') + parser.add_argument('--driver', required=False, default='sqlite', + choices=['osdk', 'sqlite'], help='manifest driver') + return parser.parse_args() -def read_segments(f, lst, size, o): - zero_hash = sha1(bytes([0] * size)).hexdigest() +def read_segments(segments, metadata, backend): + size = metadata.segment_size + reads = [(segment * size, size) for segment in segments[:-1]] - for segment in lst: - f.seek(segment * size, 0) - data = f.read(size) - if not data: - raise Exception('Failed to read data') + # NOTE(SamYaple): If are reading the last segment on the disk and the + # normal read size is greater than the disk, shrink the read size to the + # appropriate size. + if segments[-1] * size > metadata.size_of_disk: + reads.append((segments[-1] * size, metadata.size_of_disk % size)) + else: + reads.append((segments[-1] * size, size)) - sha1_hash = sha1(data) - if sha1_hash.hexdigest() != zero_hash: - meta = dict() - meta['incremental'] = o.metadata['incremental'] - meta['base'] = len(o.metadata['bases']) - 1 - meta['encryption'] = 0 - meta['compression'] = 0 - meta['sha1_hash'] = sha1_hash.digest() - o.segments[segment] = meta - else: - try: - del o.segments[segment] - except KeyError: - pass + # NOTE(SamYaple): One of the few optimizations we may ever need to do is + # the dropping of segments that are 100% full of zero bytes. This can + # potentially greatly reduce the size of a manifest but more importantantly + # it reduces the number of objects we will need to track + with open('/dev/zero', 'rb') as f: + zero_blob = f.read(size) + + for start, data in backend.get_data(reads): + if data == zero_blob: + continue + yield manifest_structure.Segment( + metadata.backupset_id, + metadata.incremental, + start / size, + 0, + 0, + sha1(data).digest() + ) + + +def check_manifest(manifest_file): + return os.path.isfile(manifest_file) def main(): - device = '/dev/loop0' - old_manifest = 'manifest0.osdk' - manifest = 'manifest0.osdk' - manifest = 'manifest1.osdk' - segment_size = 4 * 1024**2 # 4MiB - size_of_disk = get_disk_size(device) - num_of_sectors = int(size_of_disk / 512) - num_of_segments = int(size_of_disk / segment_size) + args = parse_args() + if check_manifest(args.manifest): + print('manifest exists; exiting') + return - o = osdk(manifest) - o.metadata['sectors'] = num_of_sectors + manifest = driver.DriverManager( + namespace='ekko.manifest.drivers', + name=args.driver, + invoke_on_load=True, + invoke_args=[args.manifest] + ).driver - new = True - new = False - existing = True - existing_full = True - existing_full = False + backend = driver.DriverManager( + namespace='ekko.backup.backend', + name=args.backend, + invoke_on_load=True, + invoke_args=[args.backup] + ).driver - if new: - o.metadata['incremental'] = 0 - o.metadata['segment_size'] = segment_size - o.metadata['bases'] = [uuid().bytes] - segments_to_read = range(0, num_of_segments - 1) - elif existing: - o.read_manifest(old_manifest) - o.metadata['incremental'] += 1 - segments_to_read = range(1, num_of_segments - 1) - elif existing_full: - o.read_manifest(old_manifest) - o.metadata['incremental'] += 1 - segments_to_read = range(0, num_of_segments - 1) + size_of_disk = backend.get_size() + incremental = 0 + metadata = manifest_structure.Metadata(incremental, size_of_disk) - with open(device, 'rb+') as f: - read_segments(f, segments_to_read, segment_size, o) + manifest.initialize() + manifest.put_metadata(metadata) - o.write_manifest() + segments_list = list(range(0, int(math.ceil( + float(size_of_disk)/metadata.segment_size)))) + segments = read_segments(segments_list, metadata, backend) + + manifest.put_segments(segments, metadata) if __name__ == '__main__': - main() + sys.exit(main()) diff --git a/tools/dump_manifest.py b/tools/dump_manifest.py new file mode 100755 index 0000000..40c2626 --- /dev/null +++ b/tools/dump_manifest.py @@ -0,0 +1,54 @@ +#!/usr/bin/python + +# Copyright 2016 Sam Yaple +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copied and licensed from https://github.com/SamYaple/osdk + + +import argparse +import binascii +import os +import sys + +from stevedore import driver + + +def parse_args(): + parser = argparse.ArgumentParser(description='Backup Block Device') + parser.add_argument('--manifest', required=True, + help='manifest file') + parser.add_argument('--driver', required=False, default='sqlite', + choices=['osdk', 'sqlite'], help='manifest driver') + return parser.parse_args() + + +def check_manifest(manifest_file): + return os.path.isfile(manifest_file) + + +def main(): + args = parse_args() + manifest = driver.DriverManager( + namespace='ekko.manifest.drivers', + name=args.driver, + invoke_on_load=True, + invoke_args=[args.manifest] + ).driver + + for segment in manifest.get_segments(manifest.get_metadata()): + print(binascii.b2a_hex(segment.segment_hash)) + +if __name__ == '__main__': + sys.exit(main()) diff --git a/tools/generate_manifest.py b/tools/generate_manifest.py index e949f1a..422f8bd 100755 --- a/tools/generate_manifest.py +++ b/tools/generate_manifest.py @@ -69,9 +69,8 @@ def main(): ).driver size_of_disk = args.backupsize * 1024**3 # Convert GB to B - num_of_sectors = int(size_of_disk / 512) incremental = 0 - metadata = manifest_structure.Metadata(incremental, sectors=num_of_sectors) + metadata = manifest_structure.Metadata(incremental, size_of_disk) manifest.initialize() manifest.put_metadata(metadata)