Add initial backup code

Updates backup.py and adds dump_manifest.py that will print out all
hashes from the underlying blocks. This can be replicated with a
simple python script as follows:

    # dump_disk.py
    import hashlib
    with open('/dev/loop0', 'rb') as f:
        while True:
            data = f.read(4*1024**2)
            if not data:
                break
            print(hashlib.sha1(data).hexdigest())

    python dump_disk.py | grep -v 2bccbd2f38f15c13eb7d5a89fd9d85f595e23bc3

NOTE: 2bccbd2f38f15c13eb7d5a89fd9d85f595e23bc3 == segment of all zero

To perform backup and dump info from manifest (which will match
the output of the above script):

    tools/backup.py --backup /dev/loop0 --manifest mani
    tools/dump_manifest.py --manifest mani

The backup works on both files and block devices with the raw driver.
Further drivers will be implemented (such as qcow2 and rbd) as time
goes on.

Adjust sectors to equal number of bytes on size instead.

Change-Id: I976f02a27bc13b5774a6088799ca61f65ec04f14
This commit is contained in:
SamYaple 2016-01-15 23:01:37 +00:00
parent 51bfcd893e
commit 401c05413c
13 changed files with 226 additions and 67 deletions

0
ekko/backup/__init__.py Normal file
View File

View File

View File

@ -0,0 +1,29 @@
# Copyright 2016 Sam Yaple
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ekko.backup import backend
class RawBackup(backend.BaseBackup):
def get_data(self, reads):
with open(self.backup_location, 'rb') as f:
for start, size in reads:
f.seek(start, 0)
yield (start, f.read(size))
def get_size(self):
with open(self.backup_location, 'rb') as f:
f.seek(0, 2)
return f.tell()

View File

View File

45
ekko/backup/backend.py Normal file
View File

@ -0,0 +1,45 @@
# Copyright 2016 Sam Yaple
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import abc
import six
@six.add_metaclass(abc.ABCMeta)
class BaseBackup(object):
"""Base class for Backup drivers
:params backup_location: Location of device or file to backup
"""
def __init__(self, backup_location):
self.backup_location = backup_location
@abc.abstractmethod
def get_data(self, reads):
"""Get data from backing device or file
:params reads: A list of tuples with the start sector and size of read
:returns: An interable of tuples with start sector and data
"""
raise NotImplementedError()
@abc.abstractmethod
def get_size(self):
"""Get size of disk to backup
:returns: Size of disk in bytes
"""
raise NotImplementedError()

View File

@ -48,7 +48,7 @@ class OSDKManifest(drivers.BaseManifest):
'<2IQ24s',
metadata.incremental,
metadata.segment_size,
metadata.sectors,
metadata.size_of_disk,
metadata.timestamp
))
for backupset in metadata.backupsets:

View File

@ -69,19 +69,26 @@ class SQLiteManifest(drivers.BaseManifest):
return structure.Metadata(
incremental=metadata['incremental'],
sectors=metadata['sectors'],
size_of_disk=metadata['size_of_disk'],
segment_size=metadata['segment_size'],
timestamp=metadata['timestamp'],
backupset_id=backupsets[-1],
backupsets=backupsets
)
def get_segments(self):
def get_segments(self, metadata):
with self.get_conn() as conn:
with closing(conn.cursor()) as cur:
cur.execute("SELECT * FROM segments")
for result in cur:
yield result
yield structure.Segment(
backupset_id=metadata.backupsets[result[5]],
incremental=result[0],
segment=result[1],
compression=result[2],
encryption=result[3],
segment_hash=str(result[4])
)
def put_segments(self, segments, metadata):
with self.get_conn() as conn:
@ -108,7 +115,7 @@ class SQLiteManifest(drivers.BaseManifest):
[
('incremental', metadata.incremental),
('segment_size', metadata.segment_size),
('sectors', metadata.sectors),
('size_of_disk', metadata.size_of_disk),
('timestamp', metadata.timestamp)
]
)

View File

@ -18,10 +18,10 @@ from uuid import uuid4 as uuid
class Metadata(object):
def __init__(self, incremental, sectors, segment_size=None,
def __init__(self, incremental, size_of_disk, segment_size=None,
timestamp=None, backupsets=None, backupset_id=None):
self.timestamp = timestamp if timestamp else time.time()
self.sectors = sectors
self.size_of_disk = size_of_disk
self.incremental = incremental
self.segment_size = 4 * 1024 ** 2 # 4MiB
self.backupset_id = backupset_id if backupset_id else uuid().bytes

View File

@ -20,6 +20,9 @@ classifier =
Programming Language :: Python :: 3.4
[entry_points]
ekko.backup.backend =
raw = ekko.backup._backend.raw:RawBackup
qcow2 = ekko.backup._backend.qcow2:QCOW2Backup
ekko.manifest.drivers =
osdk = ekko.manifest._drivers.osdk:OSDKManifest
sqlite = ekko.manifest._drivers.sqlite:SQLiteManifest

138
tools/backup.py Normal file → Executable file
View File

@ -17,78 +17,100 @@
# Copied and licensed from https://github.com/SamYaple/osdk
import argparse
from hashlib import sha1
from osdk import osdk
from uuid import uuid4 as uuid
import math
import os
import sys
from ekko.manifest import structure as manifest_structure
from six.moves import range
from stevedore import driver
def get_disk_size(device):
with open(device, 'rb') as f:
return f.seek(0, 2)
def parse_args():
parser = argparse.ArgumentParser(description='Backup Block Device')
parser.add_argument('--backup', required=True,
help='Path to backup file or device')
parser.add_argument('--manifest', required=True,
help='manifest file')
parser.add_argument('--cbt', required=False,
help='change block tracking info')
parser.add_argument('--backend', required=False, default='raw',
choices=['raw'], help='backend driver')
parser.add_argument('--driver', required=False, default='sqlite',
choices=['osdk', 'sqlite'], help='manifest driver')
return parser.parse_args()
def read_segments(f, lst, size, o):
zero_hash = sha1(bytes([0] * size)).hexdigest()
def read_segments(segments, metadata, backend):
size = metadata.segment_size
reads = [(segment * size, size) for segment in segments[:-1]]
for segment in lst:
f.seek(segment * size, 0)
data = f.read(size)
if not data:
raise Exception('Failed to read data')
# NOTE(SamYaple): If are reading the last segment on the disk and the
# normal read size is greater than the disk, shrink the read size to the
# appropriate size.
if segments[-1] * size > metadata.size_of_disk:
reads.append((segments[-1] * size, metadata.size_of_disk % size))
else:
reads.append((segments[-1] * size, size))
sha1_hash = sha1(data)
if sha1_hash.hexdigest() != zero_hash:
meta = dict()
meta['incremental'] = o.metadata['incremental']
meta['base'] = len(o.metadata['bases']) - 1
meta['encryption'] = 0
meta['compression'] = 0
meta['sha1_hash'] = sha1_hash.digest()
o.segments[segment] = meta
else:
try:
del o.segments[segment]
except KeyError:
pass
# NOTE(SamYaple): One of the few optimizations we may ever need to do is
# the dropping of segments that are 100% full of zero bytes. This can
# potentially greatly reduce the size of a manifest but more importantantly
# it reduces the number of objects we will need to track
with open('/dev/zero', 'rb') as f:
zero_blob = f.read(size)
for start, data in backend.get_data(reads):
if data == zero_blob:
continue
yield manifest_structure.Segment(
metadata.backupset_id,
metadata.incremental,
start / size,
0,
0,
sha1(data).digest()
)
def check_manifest(manifest_file):
return os.path.isfile(manifest_file)
def main():
device = '/dev/loop0'
old_manifest = 'manifest0.osdk'
manifest = 'manifest0.osdk'
manifest = 'manifest1.osdk'
segment_size = 4 * 1024**2 # 4MiB
size_of_disk = get_disk_size(device)
num_of_sectors = int(size_of_disk / 512)
num_of_segments = int(size_of_disk / segment_size)
args = parse_args()
if check_manifest(args.manifest):
print('manifest exists; exiting')
return
o = osdk(manifest)
o.metadata['sectors'] = num_of_sectors
manifest = driver.DriverManager(
namespace='ekko.manifest.drivers',
name=args.driver,
invoke_on_load=True,
invoke_args=[args.manifest]
).driver
new = True
new = False
existing = True
existing_full = True
existing_full = False
backend = driver.DriverManager(
namespace='ekko.backup.backend',
name=args.backend,
invoke_on_load=True,
invoke_args=[args.backup]
).driver
if new:
o.metadata['incremental'] = 0
o.metadata['segment_size'] = segment_size
o.metadata['bases'] = [uuid().bytes]
segments_to_read = range(0, num_of_segments - 1)
elif existing:
o.read_manifest(old_manifest)
o.metadata['incremental'] += 1
segments_to_read = range(1, num_of_segments - 1)
elif existing_full:
o.read_manifest(old_manifest)
o.metadata['incremental'] += 1
segments_to_read = range(0, num_of_segments - 1)
size_of_disk = backend.get_size()
incremental = 0
metadata = manifest_structure.Metadata(incremental, size_of_disk)
with open(device, 'rb+') as f:
read_segments(f, segments_to_read, segment_size, o)
manifest.initialize()
manifest.put_metadata(metadata)
o.write_manifest()
segments_list = list(range(0, int(math.ceil(
float(size_of_disk)/metadata.segment_size))))
segments = read_segments(segments_list, metadata, backend)
manifest.put_segments(segments, metadata)
if __name__ == '__main__':
main()
sys.exit(main())

54
tools/dump_manifest.py Executable file
View File

@ -0,0 +1,54 @@
#!/usr/bin/python
# Copyright 2016 Sam Yaple
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copied and licensed from https://github.com/SamYaple/osdk
import argparse
import binascii
import os
import sys
from stevedore import driver
def parse_args():
parser = argparse.ArgumentParser(description='Backup Block Device')
parser.add_argument('--manifest', required=True,
help='manifest file')
parser.add_argument('--driver', required=False, default='sqlite',
choices=['osdk', 'sqlite'], help='manifest driver')
return parser.parse_args()
def check_manifest(manifest_file):
return os.path.isfile(manifest_file)
def main():
args = parse_args()
manifest = driver.DriverManager(
namespace='ekko.manifest.drivers',
name=args.driver,
invoke_on_load=True,
invoke_args=[args.manifest]
).driver
for segment in manifest.get_segments(manifest.get_metadata()):
print(binascii.b2a_hex(segment.segment_hash))
if __name__ == '__main__':
sys.exit(main())

View File

@ -69,9 +69,8 @@ def main():
).driver
size_of_disk = args.backupsize * 1024**3 # Convert GB to B
num_of_sectors = int(size_of_disk / 512)
incremental = 0
metadata = manifest_structure.Metadata(incremental, sectors=num_of_sectors)
metadata = manifest_structure.Metadata(incremental, size_of_disk)
manifest.initialize()
manifest.put_metadata(metadata)