Add initial backup code
Updates backup.py and adds dump_manifest.py that will print out all hashes from the underlying blocks. This can be replicated with a simple python script as follows: # dump_disk.py import hashlib with open('/dev/loop0', 'rb') as f: while True: data = f.read(4*1024**2) if not data: break print(hashlib.sha1(data).hexdigest()) python dump_disk.py | grep -v 2bccbd2f38f15c13eb7d5a89fd9d85f595e23bc3 NOTE: 2bccbd2f38f15c13eb7d5a89fd9d85f595e23bc3 == segment of all zero To perform backup and dump info from manifest (which will match the output of the above script): tools/backup.py --backup /dev/loop0 --manifest mani tools/dump_manifest.py --manifest mani The backup works on both files and block devices with the raw driver. Further drivers will be implemented (such as qcow2 and rbd) as time goes on. Adjust sectors to equal number of bytes on size instead. Change-Id: I976f02a27bc13b5774a6088799ca61f65ec04f14
This commit is contained in:
parent
51bfcd893e
commit
401c05413c
|
@ -0,0 +1,29 @@
|
|||
# Copyright 2016 Sam Yaple
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from ekko.backup import backend
|
||||
|
||||
|
||||
class RawBackup(backend.BaseBackup):
|
||||
|
||||
def get_data(self, reads):
|
||||
with open(self.backup_location, 'rb') as f:
|
||||
for start, size in reads:
|
||||
f.seek(start, 0)
|
||||
yield (start, f.read(size))
|
||||
|
||||
def get_size(self):
|
||||
with open(self.backup_location, 'rb') as f:
|
||||
f.seek(0, 2)
|
||||
return f.tell()
|
|
@ -0,0 +1,45 @@
|
|||
# Copyright 2016 Sam Yaple
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import abc
|
||||
|
||||
import six
|
||||
|
||||
|
||||
@six.add_metaclass(abc.ABCMeta)
|
||||
class BaseBackup(object):
|
||||
"""Base class for Backup drivers
|
||||
|
||||
:params backup_location: Location of device or file to backup
|
||||
"""
|
||||
|
||||
def __init__(self, backup_location):
|
||||
self.backup_location = backup_location
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_data(self, reads):
|
||||
"""Get data from backing device or file
|
||||
|
||||
:params reads: A list of tuples with the start sector and size of read
|
||||
:returns: An interable of tuples with start sector and data
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_size(self):
|
||||
"""Get size of disk to backup
|
||||
|
||||
:returns: Size of disk in bytes
|
||||
"""
|
||||
raise NotImplementedError()
|
|
@ -48,7 +48,7 @@ class OSDKManifest(drivers.BaseManifest):
|
|||
'<2IQ24s',
|
||||
metadata.incremental,
|
||||
metadata.segment_size,
|
||||
metadata.sectors,
|
||||
metadata.size_of_disk,
|
||||
metadata.timestamp
|
||||
))
|
||||
for backupset in metadata.backupsets:
|
||||
|
|
|
@ -69,19 +69,26 @@ class SQLiteManifest(drivers.BaseManifest):
|
|||
|
||||
return structure.Metadata(
|
||||
incremental=metadata['incremental'],
|
||||
sectors=metadata['sectors'],
|
||||
size_of_disk=metadata['size_of_disk'],
|
||||
segment_size=metadata['segment_size'],
|
||||
timestamp=metadata['timestamp'],
|
||||
backupset_id=backupsets[-1],
|
||||
backupsets=backupsets
|
||||
)
|
||||
|
||||
def get_segments(self):
|
||||
def get_segments(self, metadata):
|
||||
with self.get_conn() as conn:
|
||||
with closing(conn.cursor()) as cur:
|
||||
cur.execute("SELECT * FROM segments")
|
||||
for result in cur:
|
||||
yield result
|
||||
yield structure.Segment(
|
||||
backupset_id=metadata.backupsets[result[5]],
|
||||
incremental=result[0],
|
||||
segment=result[1],
|
||||
compression=result[2],
|
||||
encryption=result[3],
|
||||
segment_hash=str(result[4])
|
||||
)
|
||||
|
||||
def put_segments(self, segments, metadata):
|
||||
with self.get_conn() as conn:
|
||||
|
@ -108,7 +115,7 @@ class SQLiteManifest(drivers.BaseManifest):
|
|||
[
|
||||
('incremental', metadata.incremental),
|
||||
('segment_size', metadata.segment_size),
|
||||
('sectors', metadata.sectors),
|
||||
('size_of_disk', metadata.size_of_disk),
|
||||
('timestamp', metadata.timestamp)
|
||||
]
|
||||
)
|
||||
|
|
|
@ -18,10 +18,10 @@ from uuid import uuid4 as uuid
|
|||
|
||||
class Metadata(object):
|
||||
|
||||
def __init__(self, incremental, sectors, segment_size=None,
|
||||
def __init__(self, incremental, size_of_disk, segment_size=None,
|
||||
timestamp=None, backupsets=None, backupset_id=None):
|
||||
self.timestamp = timestamp if timestamp else time.time()
|
||||
self.sectors = sectors
|
||||
self.size_of_disk = size_of_disk
|
||||
self.incremental = incremental
|
||||
self.segment_size = 4 * 1024 ** 2 # 4MiB
|
||||
self.backupset_id = backupset_id if backupset_id else uuid().bytes
|
||||
|
|
|
@ -20,6 +20,9 @@ classifier =
|
|||
Programming Language :: Python :: 3.4
|
||||
|
||||
[entry_points]
|
||||
ekko.backup.backend =
|
||||
raw = ekko.backup._backend.raw:RawBackup
|
||||
qcow2 = ekko.backup._backend.qcow2:QCOW2Backup
|
||||
ekko.manifest.drivers =
|
||||
osdk = ekko.manifest._drivers.osdk:OSDKManifest
|
||||
sqlite = ekko.manifest._drivers.sqlite:SQLiteManifest
|
||||
|
|
|
@ -17,78 +17,100 @@
|
|||
# Copied and licensed from https://github.com/SamYaple/osdk
|
||||
|
||||
|
||||
import argparse
|
||||
from hashlib import sha1
|
||||
from osdk import osdk
|
||||
from uuid import uuid4 as uuid
|
||||
import math
|
||||
import os
|
||||
import sys
|
||||
|
||||
from ekko.manifest import structure as manifest_structure
|
||||
from six.moves import range
|
||||
from stevedore import driver
|
||||
|
||||
|
||||
def get_disk_size(device):
|
||||
with open(device, 'rb') as f:
|
||||
return f.seek(0, 2)
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='Backup Block Device')
|
||||
parser.add_argument('--backup', required=True,
|
||||
help='Path to backup file or device')
|
||||
parser.add_argument('--manifest', required=True,
|
||||
help='manifest file')
|
||||
parser.add_argument('--cbt', required=False,
|
||||
help='change block tracking info')
|
||||
parser.add_argument('--backend', required=False, default='raw',
|
||||
choices=['raw'], help='backend driver')
|
||||
parser.add_argument('--driver', required=False, default='sqlite',
|
||||
choices=['osdk', 'sqlite'], help='manifest driver')
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def read_segments(f, lst, size, o):
|
||||
zero_hash = sha1(bytes([0] * size)).hexdigest()
|
||||
def read_segments(segments, metadata, backend):
|
||||
size = metadata.segment_size
|
||||
reads = [(segment * size, size) for segment in segments[:-1]]
|
||||
|
||||
for segment in lst:
|
||||
f.seek(segment * size, 0)
|
||||
data = f.read(size)
|
||||
if not data:
|
||||
raise Exception('Failed to read data')
|
||||
# NOTE(SamYaple): If are reading the last segment on the disk and the
|
||||
# normal read size is greater than the disk, shrink the read size to the
|
||||
# appropriate size.
|
||||
if segments[-1] * size > metadata.size_of_disk:
|
||||
reads.append((segments[-1] * size, metadata.size_of_disk % size))
|
||||
else:
|
||||
reads.append((segments[-1] * size, size))
|
||||
|
||||
sha1_hash = sha1(data)
|
||||
if sha1_hash.hexdigest() != zero_hash:
|
||||
meta = dict()
|
||||
meta['incremental'] = o.metadata['incremental']
|
||||
meta['base'] = len(o.metadata['bases']) - 1
|
||||
meta['encryption'] = 0
|
||||
meta['compression'] = 0
|
||||
meta['sha1_hash'] = sha1_hash.digest()
|
||||
o.segments[segment] = meta
|
||||
else:
|
||||
try:
|
||||
del o.segments[segment]
|
||||
except KeyError:
|
||||
pass
|
||||
# NOTE(SamYaple): One of the few optimizations we may ever need to do is
|
||||
# the dropping of segments that are 100% full of zero bytes. This can
|
||||
# potentially greatly reduce the size of a manifest but more importantantly
|
||||
# it reduces the number of objects we will need to track
|
||||
with open('/dev/zero', 'rb') as f:
|
||||
zero_blob = f.read(size)
|
||||
|
||||
for start, data in backend.get_data(reads):
|
||||
if data == zero_blob:
|
||||
continue
|
||||
yield manifest_structure.Segment(
|
||||
metadata.backupset_id,
|
||||
metadata.incremental,
|
||||
start / size,
|
||||
0,
|
||||
0,
|
||||
sha1(data).digest()
|
||||
)
|
||||
|
||||
|
||||
def check_manifest(manifest_file):
|
||||
return os.path.isfile(manifest_file)
|
||||
|
||||
|
||||
def main():
|
||||
device = '/dev/loop0'
|
||||
old_manifest = 'manifest0.osdk'
|
||||
manifest = 'manifest0.osdk'
|
||||
manifest = 'manifest1.osdk'
|
||||
segment_size = 4 * 1024**2 # 4MiB
|
||||
size_of_disk = get_disk_size(device)
|
||||
num_of_sectors = int(size_of_disk / 512)
|
||||
num_of_segments = int(size_of_disk / segment_size)
|
||||
args = parse_args()
|
||||
if check_manifest(args.manifest):
|
||||
print('manifest exists; exiting')
|
||||
return
|
||||
|
||||
o = osdk(manifest)
|
||||
o.metadata['sectors'] = num_of_sectors
|
||||
manifest = driver.DriverManager(
|
||||
namespace='ekko.manifest.drivers',
|
||||
name=args.driver,
|
||||
invoke_on_load=True,
|
||||
invoke_args=[args.manifest]
|
||||
).driver
|
||||
|
||||
new = True
|
||||
new = False
|
||||
existing = True
|
||||
existing_full = True
|
||||
existing_full = False
|
||||
backend = driver.DriverManager(
|
||||
namespace='ekko.backup.backend',
|
||||
name=args.backend,
|
||||
invoke_on_load=True,
|
||||
invoke_args=[args.backup]
|
||||
).driver
|
||||
|
||||
if new:
|
||||
o.metadata['incremental'] = 0
|
||||
o.metadata['segment_size'] = segment_size
|
||||
o.metadata['bases'] = [uuid().bytes]
|
||||
segments_to_read = range(0, num_of_segments - 1)
|
||||
elif existing:
|
||||
o.read_manifest(old_manifest)
|
||||
o.metadata['incremental'] += 1
|
||||
segments_to_read = range(1, num_of_segments - 1)
|
||||
elif existing_full:
|
||||
o.read_manifest(old_manifest)
|
||||
o.metadata['incremental'] += 1
|
||||
segments_to_read = range(0, num_of_segments - 1)
|
||||
size_of_disk = backend.get_size()
|
||||
incremental = 0
|
||||
metadata = manifest_structure.Metadata(incremental, size_of_disk)
|
||||
|
||||
with open(device, 'rb+') as f:
|
||||
read_segments(f, segments_to_read, segment_size, o)
|
||||
manifest.initialize()
|
||||
manifest.put_metadata(metadata)
|
||||
|
||||
o.write_manifest()
|
||||
segments_list = list(range(0, int(math.ceil(
|
||||
float(size_of_disk)/metadata.segment_size))))
|
||||
segments = read_segments(segments_list, metadata, backend)
|
||||
|
||||
manifest.put_segments(segments, metadata)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
sys.exit(main())
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
# Copyright 2016 Sam Yaple
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Copied and licensed from https://github.com/SamYaple/osdk
|
||||
|
||||
|
||||
import argparse
|
||||
import binascii
|
||||
import os
|
||||
import sys
|
||||
|
||||
from stevedore import driver
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='Backup Block Device')
|
||||
parser.add_argument('--manifest', required=True,
|
||||
help='manifest file')
|
||||
parser.add_argument('--driver', required=False, default='sqlite',
|
||||
choices=['osdk', 'sqlite'], help='manifest driver')
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def check_manifest(manifest_file):
|
||||
return os.path.isfile(manifest_file)
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
manifest = driver.DriverManager(
|
||||
namespace='ekko.manifest.drivers',
|
||||
name=args.driver,
|
||||
invoke_on_load=True,
|
||||
invoke_args=[args.manifest]
|
||||
).driver
|
||||
|
||||
for segment in manifest.get_segments(manifest.get_metadata()):
|
||||
print(binascii.b2a_hex(segment.segment_hash))
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
|
@ -69,9 +69,8 @@ def main():
|
|||
).driver
|
||||
|
||||
size_of_disk = args.backupsize * 1024**3 # Convert GB to B
|
||||
num_of_sectors = int(size_of_disk / 512)
|
||||
incremental = 0
|
||||
metadata = manifest_structure.Metadata(incremental, sectors=num_of_sectors)
|
||||
metadata = manifest_structure.Metadata(incremental, size_of_disk)
|
||||
|
||||
manifest.initialize()
|
||||
manifest.put_metadata(metadata)
|
||||
|
|
Loading…
Reference in New Issue