Merge "Add initial backup code"

This commit is contained in:
Jenkins 2016-01-18 20:14:21 +00:00 committed by Gerrit Code Review
commit 3e78319bc0
13 changed files with 226 additions and 67 deletions

0
ekko/backup/__init__.py Normal file
View File

View File

View File

@ -0,0 +1,29 @@
# Copyright 2016 Sam Yaple
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ekko.backup import backend
class RawBackup(backend.BaseBackup):
def get_data(self, reads):
with open(self.backup_location, 'rb') as f:
for start, size in reads:
f.seek(start, 0)
yield (start, f.read(size))
def get_size(self):
with open(self.backup_location, 'rb') as f:
f.seek(0, 2)
return f.tell()

View File

View File

45
ekko/backup/backend.py Normal file
View File

@ -0,0 +1,45 @@
# Copyright 2016 Sam Yaple
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import abc
import six
@six.add_metaclass(abc.ABCMeta)
class BaseBackup(object):
"""Base class for Backup drivers
:params backup_location: Location of device or file to backup
"""
def __init__(self, backup_location):
self.backup_location = backup_location
@abc.abstractmethod
def get_data(self, reads):
"""Get data from backing device or file
:params reads: A list of tuples with the start sector and size of read
:returns: An interable of tuples with start sector and data
"""
raise NotImplementedError()
@abc.abstractmethod
def get_size(self):
"""Get size of disk to backup
:returns: Size of disk in bytes
"""
raise NotImplementedError()

View File

@ -48,7 +48,7 @@ class OSDKManifest(drivers.BaseManifest):
'<2IQ24s',
metadata.incremental,
metadata.segment_size,
metadata.sectors,
metadata.size_of_disk,
metadata.timestamp
))
for backupset in metadata.backupsets:

View File

@ -69,19 +69,26 @@ class SQLiteManifest(drivers.BaseManifest):
return structure.Metadata(
incremental=metadata['incremental'],
sectors=metadata['sectors'],
size_of_disk=metadata['size_of_disk'],
segment_size=metadata['segment_size'],
timestamp=metadata['timestamp'],
backupset_id=backupsets[-1],
backupsets=backupsets
)
def get_segments(self):
def get_segments(self, metadata):
with self.get_conn() as conn:
with closing(conn.cursor()) as cur:
cur.execute("SELECT * FROM segments")
for result in cur:
yield result
yield structure.Segment(
backupset_id=metadata.backupsets[result[5]],
incremental=result[0],
segment=result[1],
compression=result[2],
encryption=result[3],
segment_hash=str(result[4])
)
def put_segments(self, segments, metadata):
with self.get_conn() as conn:
@ -108,7 +115,7 @@ class SQLiteManifest(drivers.BaseManifest):
[
('incremental', metadata.incremental),
('segment_size', metadata.segment_size),
('sectors', metadata.sectors),
('size_of_disk', metadata.size_of_disk),
('timestamp', metadata.timestamp)
]
)

View File

@ -18,10 +18,10 @@ from uuid import uuid4 as uuid
class Metadata(object):
def __init__(self, incremental, sectors, segment_size=None,
def __init__(self, incremental, size_of_disk, segment_size=None,
timestamp=None, backupsets=None, backupset_id=None):
self.timestamp = timestamp if timestamp else time.time()
self.sectors = sectors
self.size_of_disk = size_of_disk
self.incremental = incremental
self.segment_size = 4 * 1024 ** 2 # 4MiB
self.backupset_id = backupset_id if backupset_id else uuid().bytes

View File

@ -20,6 +20,9 @@ classifier =
Programming Language :: Python :: 3.4
[entry_points]
ekko.backup.backend =
raw = ekko.backup._backend.raw:RawBackup
qcow2 = ekko.backup._backend.qcow2:QCOW2Backup
ekko.manifest.drivers =
osdk = ekko.manifest._drivers.osdk:OSDKManifest
sqlite = ekko.manifest._drivers.sqlite:SQLiteManifest

138
tools/backup.py Normal file → Executable file
View File

@ -17,78 +17,100 @@
# Copied and licensed from https://github.com/SamYaple/osdk
import argparse
from hashlib import sha1
from osdk import osdk
from uuid import uuid4 as uuid
import math
import os
import sys
from ekko.manifest import structure as manifest_structure
from six.moves import range
from stevedore import driver
def get_disk_size(device):
with open(device, 'rb') as f:
return f.seek(0, 2)
def parse_args():
parser = argparse.ArgumentParser(description='Backup Block Device')
parser.add_argument('--backup', required=True,
help='Path to backup file or device')
parser.add_argument('--manifest', required=True,
help='manifest file')
parser.add_argument('--cbt', required=False,
help='change block tracking info')
parser.add_argument('--backend', required=False, default='raw',
choices=['raw'], help='backend driver')
parser.add_argument('--driver', required=False, default='sqlite',
choices=['osdk', 'sqlite'], help='manifest driver')
return parser.parse_args()
def read_segments(f, lst, size, o):
zero_hash = sha1(bytes([0] * size)).hexdigest()
def read_segments(segments, metadata, backend):
size = metadata.segment_size
reads = [(segment * size, size) for segment in segments[:-1]]
for segment in lst:
f.seek(segment * size, 0)
data = f.read(size)
if not data:
raise Exception('Failed to read data')
# NOTE(SamYaple): If are reading the last segment on the disk and the
# normal read size is greater than the disk, shrink the read size to the
# appropriate size.
if segments[-1] * size > metadata.size_of_disk:
reads.append((segments[-1] * size, metadata.size_of_disk % size))
else:
reads.append((segments[-1] * size, size))
sha1_hash = sha1(data)
if sha1_hash.hexdigest() != zero_hash:
meta = dict()
meta['incremental'] = o.metadata['incremental']
meta['base'] = len(o.metadata['bases']) - 1
meta['encryption'] = 0
meta['compression'] = 0
meta['sha1_hash'] = sha1_hash.digest()
o.segments[segment] = meta
else:
try:
del o.segments[segment]
except KeyError:
pass
# NOTE(SamYaple): One of the few optimizations we may ever need to do is
# the dropping of segments that are 100% full of zero bytes. This can
# potentially greatly reduce the size of a manifest but more importantantly
# it reduces the number of objects we will need to track
with open('/dev/zero', 'rb') as f:
zero_blob = f.read(size)
for start, data in backend.get_data(reads):
if data == zero_blob:
continue
yield manifest_structure.Segment(
metadata.backupset_id,
metadata.incremental,
start / size,
0,
0,
sha1(data).digest()
)
def check_manifest(manifest_file):
return os.path.isfile(manifest_file)
def main():
device = '/dev/loop0'
old_manifest = 'manifest0.osdk'
manifest = 'manifest0.osdk'
manifest = 'manifest1.osdk'
segment_size = 4 * 1024**2 # 4MiB
size_of_disk = get_disk_size(device)
num_of_sectors = int(size_of_disk / 512)
num_of_segments = int(size_of_disk / segment_size)
args = parse_args()
if check_manifest(args.manifest):
print('manifest exists; exiting')
return
o = osdk(manifest)
o.metadata['sectors'] = num_of_sectors
manifest = driver.DriverManager(
namespace='ekko.manifest.drivers',
name=args.driver,
invoke_on_load=True,
invoke_args=[args.manifest]
).driver
new = True
new = False
existing = True
existing_full = True
existing_full = False
backend = driver.DriverManager(
namespace='ekko.backup.backend',
name=args.backend,
invoke_on_load=True,
invoke_args=[args.backup]
).driver
if new:
o.metadata['incremental'] = 0
o.metadata['segment_size'] = segment_size
o.metadata['bases'] = [uuid().bytes]
segments_to_read = range(0, num_of_segments - 1)
elif existing:
o.read_manifest(old_manifest)
o.metadata['incremental'] += 1
segments_to_read = range(1, num_of_segments - 1)
elif existing_full:
o.read_manifest(old_manifest)
o.metadata['incremental'] += 1
segments_to_read = range(0, num_of_segments - 1)
size_of_disk = backend.get_size()
incremental = 0
metadata = manifest_structure.Metadata(incremental, size_of_disk)
with open(device, 'rb+') as f:
read_segments(f, segments_to_read, segment_size, o)
manifest.initialize()
manifest.put_metadata(metadata)
o.write_manifest()
segments_list = list(range(0, int(math.ceil(
float(size_of_disk)/metadata.segment_size))))
segments = read_segments(segments_list, metadata, backend)
manifest.put_segments(segments, metadata)
if __name__ == '__main__':
main()
sys.exit(main())

54
tools/dump_manifest.py Executable file
View File

@ -0,0 +1,54 @@
#!/usr/bin/python
# Copyright 2016 Sam Yaple
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copied and licensed from https://github.com/SamYaple/osdk
import argparse
import binascii
import os
import sys
from stevedore import driver
def parse_args():
parser = argparse.ArgumentParser(description='Backup Block Device')
parser.add_argument('--manifest', required=True,
help='manifest file')
parser.add_argument('--driver', required=False, default='sqlite',
choices=['osdk', 'sqlite'], help='manifest driver')
return parser.parse_args()
def check_manifest(manifest_file):
return os.path.isfile(manifest_file)
def main():
args = parse_args()
manifest = driver.DriverManager(
namespace='ekko.manifest.drivers',
name=args.driver,
invoke_on_load=True,
invoke_args=[args.manifest]
).driver
for segment in manifest.get_segments(manifest.get_metadata()):
print(binascii.b2a_hex(segment.segment_hash))
if __name__ == '__main__':
sys.exit(main())

View File

@ -69,9 +69,8 @@ def main():
).driver
size_of_disk = args.backupsize * 1024**3 # Convert GB to B
num_of_sectors = int(size_of_disk / 512)
incremental = 0
metadata = manifest_structure.Metadata(incremental, sectors=num_of_sectors)
metadata = manifest_structure.Metadata(incremental, size_of_disk)
manifest.initialize()
manifest.put_metadata(metadata)