Adjust the way backupsets are stored in manifest

Upon debugging the difference between the storage with sqlite and OSDK
I discovered the biggest difference was the backupset_id storage.

Converting this back to my original design of a index reference has
brought the two formats within ~5% of each other in size! Additionally
this change creates a 40% reduction in the manifest size.

A 2^24 objects backup will now only take ~500MB instead of ~800MB

Now SQLite is still about 50% slower than OSDK, but since it is about
the same size the slowdown isn't even something worth mentioning. The
exact times are 55s of OSDK and 90s for sqlite at 2^24 objects.

Change-Id: If63380f312691e34c92f8a95fff5e724a4f5a465
This commit is contained in:
SamYaple 2016-01-10 01:58:50 +00:00
parent d31e2ebef5
commit 0f62c546d1
5 changed files with 25 additions and 12 deletions

View File

@ -37,5 +37,5 @@ class ManifestDriver(object):
def put_metadata(self, metadata):
raise NotImplementedError()
def put_segments(self, segments):
def put_segments(self, segments, metadata):
raise NotImplementedError()

View File

@ -29,17 +29,17 @@ class OSDKDriver(driver.ManifestDriver):
with open(self.manifest_file, 'wb', 4096) as f:
f.write(os.urandom(20))
def put_segments(self, segments):
def put_segments(self, segments, metadata):
with open(self.manifest_file, 'ab', 4096) as f:
for segment in segments:
f.write(pack(
'<16s2I2B20s',
segment.backupset_id,
'<2I2B20sI',
segment.incremental,
segment.segment,
segment.compression,
segment.encryption,
segment.segment_hash
segment.segment_hash,
metadata.backupsets.index(segment.backupset_id)
))
def put_metadata(self, metadata):
@ -51,3 +51,5 @@ class OSDKDriver(driver.ManifestDriver):
metadata.sectors,
metadata.timestamp
))
for backupset in metadata.backupsets:
f.write(pack('<16s', backupset))

View File

@ -30,13 +30,18 @@ class SQLiteDriver(driver.ManifestDriver):
key TEXT PRIMARY KEY,
value TEXT
);
CREATE TABLE backupsets (
id INTEGER PRIMARY KEY,
backupset_id BLOB
);
CREATE TABLE segments (
backupset_id BLOB,
incremental INTEGER,
segment INTEGER PRIMARY KEY,
compression TINYINT,
encryption TINYINT,
segment_hash BLOB
segment_hash BLOB,
backupset_id INTEGER,
FOREIGN KEY(backupset_id) REFERENCES backupsets(id)
);
""")
conn.commit()
@ -53,19 +58,19 @@ class SQLiteDriver(driver.ManifestDriver):
conn.rollback()
self.conn = conn
def put_segments(self, segments):
def put_segments(self, segments, metadata):
with self.get_conn() as conn:
with closing(conn.cursor()) as cur:
for segment in segments:
cur.execute(
"INSERT INTO segments VALUES (?, ?, ?, ?, ?, ?)",
(
buffer(segment.backupset_id),
segment.incremental,
segment.segment,
segment.compression,
segment.encryption,
buffer(segment.segment_hash)
buffer(segment.segment_hash),
metadata.backupsets.index(segment.backupset_id)
)
)
conn.commit()
@ -82,5 +87,10 @@ class SQLiteDriver(driver.ManifestDriver):
('timestamp', metadata.timestamp)
]
)
for i, v in enumerate(metadata.backupsets):
cur.execute(
"INSERT INTO backupsets VALUES (?, ?)",
(i, buffer(v))
)
conn.commit()

View File

@ -19,12 +19,13 @@ from uuid import uuid4 as uuid
class Metadata(object):
def __init__(self, incremental, sectors, segment_size=None,
timestamp=None, backupset_id=None):
timestamp=None, backupsets=None, backupset_id=None):
self.timestamp = timestamp if timestamp else time.time()
self.sectors = sectors
self.incremental = incremental
self.segment_size = 4 * 1024 ** 2 # 4MiB
self.backupset_id = backupset_id if backupset_id else uuid().bytes
self.backupsets = backupsets if backupsets else [self.backupset_id]
class Segment(object):

View File

@ -82,7 +82,7 @@ def main():
num_of_segments = int(size_of_disk / metadata.segment_size)
segments = read_segments(range(0, num_of_segments - 1), metadata)
manifest.put_segments(segments)
manifest.put_segments(segments, metadata)
if __name__ == '__main__':
sys.exit(main())