From a9d13fc32ff5a362b810ec070af7f265e8d80132 Mon Sep 17 00:00:00 2001 From: Michal Jastrzebski Date: Fri, 8 Jan 2016 11:09:14 -0600 Subject: [PATCH] Create manifest and implement sqlite driver Use class inheritance for driver in case we need something other than sqlite as time goes on. SQLite is only 10% larger than the custom format used with osdk. When compressed the SQLite manifest only fractionally larger than osdk. The time it takes to generate and process is about 2x longer. However, the benefit we get from using common contructs (sqlite) and tracking and changing the manifest schema is enough for me to think this is right way to proceed. To generate a manifest with 2^24 objects (64TB disk) run: tools/generate_manifest.py --backupsize 64000 --manifest /pathtosave/manifest This will generate a 809MB file Co-Authored-By: Sam Yaple Change-Id: Ic431bfa52b6fcaeb1c6a64cf270cbb36c496335e --- .gitignore | 1 + ekko/manifest.py | 190 -------------------------------- ekko/manifest/__init__.py | 0 ekko/manifest/driver.py | 41 +++++++ ekko/manifest/sqlite.py | 86 +++++++++++++++ ekko/manifest/structure.py | 41 +++++++ requirements.txt | 1 + tools/generate_manifest-lite.py | 112 ------------------- tools/generate_manifest.py | 59 ++++------ 9 files changed, 189 insertions(+), 342 deletions(-) delete mode 100644 ekko/manifest.py create mode 100644 ekko/manifest/__init__.py create mode 100644 ekko/manifest/driver.py create mode 100644 ekko/manifest/sqlite.py create mode 100644 ekko/manifest/structure.py delete mode 100755 tools/generate_manifest-lite.py diff --git a/.gitignore b/.gitignore index 85c5879..a820604 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ .testrepository/ ekko.egg-info/ *.pyc +*.swp diff --git a/ekko/manifest.py b/ekko/manifest.py deleted file mode 100644 index 166dbba..0000000 --- a/ekko/manifest.py +++ /dev/null @@ -1,190 +0,0 @@ -#!/usr/bin/python - -# Copyright 2016 Sam Yaple -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Copied and licensed from https://github.com/SamYaple/osdk - -from binascii import crc32 -from collections import namedtuple -from datetime import datetime -from struct import pack -from struct import unpack -from uuid import UUID - -import six - -SIGNATURE = 'd326503ab5ca49adac56c89eb0b8ef08d326503ab5ca49adac56c89eb0b8ef08' - - -class EkkoShortReadError(Exception): - - def __init__(self, size_read, size_requested): - self.size_read = size_read - self.size_requested = size_requested - - -class EkkoManifestTooNewError(Exception): - pass - - -class EkkoChecksumError(Exception): - pass - - -class EkkoInvalidSignatureError(Exception): - pass - - -class Manifest(object): - - def __init__(self, manifest): - self.manifest = manifest - self.metadata = {'version': 0} - - def write_manifest(self): - with open(self.manifest, 'wb', 1) as f: - self.write_header(f) - self.write_body(f) - - def build_header(self): - data = pack( - '=1.6 six>=1.9.0 +oslo.utils>=3.2.0 # Apache-2.0 diff --git a/tools/generate_manifest-lite.py b/tools/generate_manifest-lite.py deleted file mode 100755 index e45216f..0000000 --- a/tools/generate_manifest-lite.py +++ /dev/null @@ -1,112 +0,0 @@ -#!/usr/bin/python - -# Copyright 2016 Sam Yaple -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Copied and licensed from https://github.com/SamYaple/osdk - - -import argparse -from collections import namedtuple -# from hashlib import sha1 -import os -import sys -from uuid import uuid4 as uuid - -from ekko import manifest -from six.moves import range - - -def parse_args(): - parser = argparse.ArgumentParser(description='Backup Block Device') - parser.add_argument('--backupsize', required=True, type=int, - help='Size of backup for manifest gen (size in GB)') - parser.add_argument('--manifest', required=True, - help='manifest file') - parser.add_argument('--cbt', required=False, - help='change block tracking info') - return parser.parse_args() - - -def read_segments(segments, size, backup): - backup.segments = dict() - backup.hashes = dict() - Segment = namedtuple( - 'Segment', - 'base incremental compression encryption' - ) - - for segment in segments: - # Generate manifest info for each object in backup - backup.segments[segment] = Segment( - len(backup.metadata['bases']) - 1, - backup.metadata['info'].incremental, - 0, - 0 - ) - # Random string simulating hash sha - backup.hashes[segment] = os.urandom(20) - - -def generate_mem_struct(segments, size, backup): - b = { - '96153320-980b-4b5e-958f-ea57812b280d': [] - } - - for seg in segments: - b['96153320-980b-4b5e-958f-ea57812b280d'].append({ - seg: backup.metadata['info'].incremental - }) - - return b - - -def check_manifest(manifest_file): - return os.path.isfile(manifest_file) - - -def main(): - args = parse_args() - segment_size = 4 * 1024**2 # 4MiB - size_of_disk = args.backupsize * 1024**3 # Convert GB to B - num_of_sectors = int(size_of_disk / 512) - num_of_segments = int(size_of_disk / segment_size) - incremental = 0 - - Info = namedtuple( - 'Info', - 'timestamp incremental segment_size sectors' - ) - - if check_manifest(args.manifest): - print('manifest exists; exiting') - return - - backup = manifest.Manifest(args.manifest) - - backup.metadata['info'] = Info( - manifest.utctimestamp(), - incremental, - segment_size, - num_of_sectors, - ) - - backup.metadata['bases'] = [uuid().bytes] - - # read_segments(range(0, num_of_segments - 1), segment_size, backup) - generate_mem_struct(range(0, num_of_segments - 1), segment_size, backup) - - -if __name__ == '__main__': - sys.exit(main()) diff --git a/tools/generate_manifest.py b/tools/generate_manifest.py index 5a390ae..a0d7b70 100755 --- a/tools/generate_manifest.py +++ b/tools/generate_manifest.py @@ -18,14 +18,12 @@ import argparse -from collections import namedtuple -# from hashlib import sha1 import os import sys -from uuid import uuid4 as uuid sys.path.insert(0, '/root/ekko/') -from ekko import manifest +from ekko.manifest import driver as manifest_driver +from ekko.manifest import structure as manifest_structure from six.moves import range @@ -40,24 +38,16 @@ def parse_args(): return parser.parse_args() -def read_segments(segments, size, backup): - backup.segments = dict() - backup.hashes = dict() - Segment = namedtuple( - 'Segment', - 'base incremental compression encryption' - ) - +def read_segments(segments, metadata): for segment in segments: - # Generate manifest info for each object in backup - backup.segments[segment] = Segment( - len(backup.metadata['bases']) - 1, - backup.metadata['info'].incremental, + yield manifest_structure.Segment( + metadata.backupset_id, + metadata.incremental, + segment, 0, - 0 + 0, + os.urandom(20) ) - # Random string simulating hash sha - backup.hashes[segment] = os.urandom(20) def check_manifest(manifest_file): @@ -66,35 +56,24 @@ def check_manifest(manifest_file): def main(): args = parse_args() - segment_size = 4 * 1024**2 # 4MiB - size_of_disk = args.backupsize * 1024**3 # Convert GB to B - num_of_sectors = int(size_of_disk / 512) - num_of_segments = int(size_of_disk / segment_size) - incremental = 0 - - Info = namedtuple( - 'Info', - 'timestamp incremental segment_size sectors' - ) - if check_manifest(args.manifest): print('manifest exists; exiting') return - backup = manifest.Manifest(args.manifest) + manifest = manifest_driver.load_manifest_driver(args.manifest) - backup.metadata['info'] = Info( - manifest.utctimestamp(), - incremental, - segment_size, - num_of_sectors, - ) + size_of_disk = args.backupsize * 1024**3 # Convert GB to B + num_of_sectors = int(size_of_disk / 512) + incremental = 0 + metadata = manifest_structure.Metadata(incremental, sectors=num_of_sectors) - backup.metadata['bases'] = [uuid().bytes] + manifest.initialize() + manifest.put_metadata(metadata) - read_segments(range(0, num_of_segments - 1), segment_size, backup) + num_of_segments = int(size_of_disk / metadata.segment_size) + segments = read_segments(range(0, num_of_segments - 1), metadata) - backup.write_manifest() + manifest.put_segments(segments) if __name__ == '__main__': sys.exit(main())