diff --git a/README.md b/README.md index fe31cd5..20ccbac 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,16 @@ shoebox binary data archiving library - supports uploading to object storage -Requires PyTables, which needs (on ubuntu): +There are ArchiveReaders and ArchiveWriters which are managed +by RollManager. The RollManager opens and closes Archivers as +needed. "As needed" is determined by which RollChecker that was +passed into the RollManager. Archive files can roll over based +on file size or elapsed time (for writing). For reading, archive +files are only rolled over when the EOF is reached. -sudo apt-get install libhdf5-serial-dev -sudo pip install numpy -sudo pip install numexpr -sudo pip install cython -sudo pip install tables +Roll Managers also take care of filename creation, compression +of completed archives and transfer of archive files to remote +storage locations. +TODO: How will the ReadingRollManager know which files to read +from, and in which order, if the filename is templated? diff --git a/shoebox/archive.py b/shoebox/archive.py new file mode 100644 index 0000000..5967789 --- /dev/null +++ b/shoebox/archive.py @@ -0,0 +1,50 @@ +# Copyright (c) 2014 Dark Secret Software Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class Archive(object): + def __init__(self, filename): + self._handle = None + self.filename = filename + + def get_file_handle(self): + return self._handle + + +class ArchiveWriter(Archive): + """The active Archive for appending. + """ + def __init__(self, filename): + super(ArchiveWriter, self).__init__(filename) + self._handle = open(filename, "wb+") + + def write(self, payload): + pass + + +class ArchiveReader(Archive): + """The active Archive for consuming. + """ + def __init__(self, filename): + super(ArchiveReader, self).__init__(filename) + + def read_block(self): + pass + + def read_header(self): + pass + + def read_payload(self): + pass diff --git a/shoebox/roll_checker.py b/shoebox/roll_checker.py new file mode 100644 index 0000000..cba8d15 --- /dev/null +++ b/shoebox/roll_checker.py @@ -0,0 +1,52 @@ +# Copyright (c) 2014 Dark Secret Software Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import utils + + +class RollChecker(object): + def start(self, archive): + """Called when a new archive is selected.""" + pass + + def check(self, archive): + """Should the current archive roll?""" + pass + + +class NeverRollChecker(RollChecker): + def check(self, archive): + return False + + +class TimeRollChecker(RollChecker): + def __init__(self, timedelta): + self.timedelta = timedelta + + def start(self, archive): + self.start_time = utils.now() + self.end_time = self.start_time + self.timedelta + + def check(self, archive): + return utils.now() >= self.end_time + + +class SizeRollChecker(RollChecker): + def __init__(self, size_in_gb): + self.size_in_gb = size_in_gb + + def check(self, archive): + size = archive._get_file_handle().tell() + return size / 1073741824 >= self.size_in_gb diff --git a/shoebox/roll_manager.py b/shoebox/roll_manager.py new file mode 100644 index 0000000..bd6dd95 --- /dev/null +++ b/shoebox/roll_manager.py @@ -0,0 +1,72 @@ +# Copyright (c) 2014 Dark Secret Software Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import archive +import utils + + +class RollManager(object): + def __init__(self, filename_template, roll_checker, directory="."): + self.filename_template = filename_template + self.roll_checker = roll_checker + self.directory = directory + self.active_archive = None + + def _make_filename(self): + f = utils.now().strftime(self.filename_template) + return f.replace(" ", "_") + + def get_active_archive(self): + if not self.active_archive: + filename = self._make_filename() + self.active_archive = self.archive_class(filename) + self.roll_checker.start(self.active_archive) + + return self.active_archive + + def _should_roll_archive(self): + return self.roll_checker.check(self.active_archive) + + def _roll_archive(self): + pass + + +class ReadingRollManager(RollManager): + def __init__(self, filename_template, roll_checker, directory="."): + super(ReadingRollManager, self).__init__(filename_template, + roll_checker, directory) + self.archive_class = archive.ArchiveReader + + def read_block(self): + pass + + def read_header(self): + pass + + def read_payload(self): + pass + + +class WritingRollManager(RollManager): + def __init__(self, filename_template, roll_checker, directory="."): + super(WritingRollManager, self).__init__(filename_template, + roll_checker, directory) + self.archive_class = archive.ArchiveWriter + + def write(self, payload): + a = self.get_active_archive() + a.write(payload) + if self._should_roll_archive(): + self._roll_archive() diff --git a/shoebox/shoebox.py b/shoebox/shoebox.py deleted file mode 100644 index b0bd69d..0000000 --- a/shoebox/shoebox.py +++ /dev/null @@ -1,192 +0,0 @@ -# Copyright (c) 2014 Dark Secret Software Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import datetime -import struct - -"""Binary data archiving library. - -Data is written in the following format: - -[ENTRY] = - byte # 0x05 - long # new versions get new block-types - ulong - long - -[PAYLOAD] = - byte # 0x06 - int - [RECORD, RECORD, ...] - -[RECORD] = - long - varstr (%s) - int - - - - - - - -There are ArchiveReaders and ArchiveWriters which are managed -by RollManager. The RollManager opens and closes Archivers as -needed. "As needed" is determined by which RollChecker that was -passed into the RollManager. Archive files can roll over based -on file size or elapsed time (for writing). For reading, archive -files are only rolled over when the EOF is reached. - -Roll Managers also take care of filename creation, compression -of completed archives and transfer of archive files to remote -storage locations. - -TODO: How will the ReadingRollManager know which files to read -from, and in which order, if the filename is templated? -""" - -def now(): - """Broken out for testing.""" - return datetime.datetime.utcnow() - - -class RollChecker(object): - def start(self, archive): - """Called when a new archive is selected.""" - pass - - def check(self, archive): - """Should the current archive roll?""" - pass - - -class NeverRollChecker(RollChecker): - def check(self, archive): - return False - - -class TimeRollChecker(RollChecker): - def __init__(self, timedelta): - self.timedelta = timedelta - - def start(self, archive): - self.start_time = now() - self.end_time = self.start_time + self.timedelta - - def check(self, archive): - return now() >= self.end_time - - -class SizeRollChecker(RollChecker): - def __init__(self, size_in_gb): - self.size_in_gb = size_in_gb - - def check(self, archive): - size = archive._get_file_handle().tell() - return size / 1073741824 >= self.size_in_gb - - -class RollManager(object): - def __init__(self, filename_template, roll_checker, directory="."): - self.filename_template = filename_template - self.roll_checker = roll_checker - self.directory = directory - self.active_archive = None - - def _make_filename(self): - f = now().strftime(self.filename_template) - return f.replace(" ", "_") - - def get_active_archive(self): - if not self.active_archive: - filename = self._make_filename() - self.active_archive = self.archive_class(filename) - self.roll_checker.start(self.active_archive) - - return self.active_archive - - def _should_roll_archive(self): - return self.roll_checker.check(self.active_archive) - - def _roll_archive(self): - pass - - -class ReadingRollManager(RollManager): - - def __init__(self, filename_template, roll_checker, directory="."): - super(ReadingRollManager, self).__init__(filename_template, - roll_checker, directory) - self.archive_class = ArchiveReader - - def read_block(self): - pass - - def read_header(self): - pass - - def read_payload(self): - pass - - -class WritingRollManager(RollManager): - def __init__(self, filename_template, roll_checker, directory="."): - super(WritingRollManager, self).__init__(filename_template, - roll_checker, directory) - self.archive_class = ArchiveWriter - - def write(self, payload): - a = self.get_active_archive() - a.write(payload) - if self._should_roll_archive(): - self._roll_archive() - - -class Archive(object): - def __init__(self, filename): - self._handle = None - self.filename = filename - - def get_file_handle(self): - return self._handle - - -class ArchiveWriter(Archive): - """The active Archive for appending. - """ - def __init__(self, filename): - super(ArchiveWriter, self).__init__(filename) - self._handle = open(filename, "wb+") - - def write(self, payload): - - pass - - -class ArchiveReader(Archive): - """The active Archive for consuming. - """ - def __init__(self, filename): - super(ArchiveReader, self).__init__(filename) - - def read_block(self): - pass - - def read_header(self): - pass - - def read_payload(self): - pass - diff --git a/shoebox/utils.py b/shoebox/utils.py new file mode 100644 index 0000000..b6b72ae --- /dev/null +++ b/shoebox/utils.py @@ -0,0 +1,21 @@ +# Copyright (c) 2014 Dark Secret Software Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime + + +def now(): + """Broken out for testing.""" + return datetime.datetime.utcnow() diff --git a/test/test_shoebox.py b/test/test_shoebox.py deleted file mode 100644 index 0999ec9..0000000 --- a/test/test_shoebox.py +++ /dev/null @@ -1,95 +0,0 @@ -import datetime -import mock -import unittest - -from shoebox import shoebox - - -class TestRollChecker(unittest.TestCase): - def test_time_roll_checker_start(self): - one_hour = datetime.timedelta(hours=1) - x = shoebox.TimeRollChecker(one_hour) - now = datetime.datetime.utcnow() - with mock.patch.object(shoebox, 'now') as dt: - dt.return_value = now - x.start(None) - self.assertEqual(x.start_time, now) - self.assertEqual(x.end_time, now + one_hour) - - def test_time_roll_checker_end(self): - one_hour = datetime.timedelta(hours=1) - x = shoebox.TimeRollChecker(one_hour) - now = datetime.datetime.utcnow() - x.start_time = now - x.end_time = now + one_hour - with mock.patch.object(shoebox, 'now') as dt: - dt.return_value = now + one_hour - self.assertTrue(x.check(None)) - - with mock.patch.object(shoebox, 'now') as dt: - dt.return_value = now - self.assertFalse(x.check(None)) - - with mock.patch.object(shoebox, 'now') as dt: - dt.return_value = now + one_hour - datetime.timedelta(seconds = 1) - self.assertFalse(x.check(None)) - - def test_size_roll_checker_end(self): - one_gig = 1073741824 - x = shoebox.SizeRollChecker(10) - - archive = mock.Mock() - archive._get_file_handle.return_value.tell.return_value = one_gig * 5 - self.assertFalse(x.check(archive)) - - archive._get_file_handle.return_value.tell.return_value = one_gig * 10 - self.assertTrue(x.check(archive)) - - archive._get_file_handle.return_value.tell.return_value = one_gig * 11 - self.assertTrue(x.check(archive)) - - -class TestRollManager(unittest.TestCase): - def test_make_filename(self): - filename_template = "filename_%c" - now = datetime.datetime(day=1, month=2, year=2014, - hour=10, minute=11, second=12) - x = shoebox.RollManager("filename_%c.dat", None) - - with mock.patch.object(shoebox, "now") as dt: - dt.return_value = now - filename = x._make_filename() - self.assertEqual(filename, "filename_Sat_Feb__1_10:11:12_2014.dat") - -class TestWritingRollManager(unittest.TestCase): - def test_get_active_archive(self): - roll_checker = mock.Mock() - filename_template = "filename_%c.dat" - x = shoebox.WritingRollManager(filename_template, roll_checker) - archive = x.get_active_archive() - self.assertTrue(isinstance(archive, shoebox.ArchiveWriter)) - self.assertTrue(roll_checker.start.called) - - def test_write_always_roll(self): - roll_checker = mock.Mock() - roll_checker.check.return_value = True - x = shoebox.WritingRollManager("template", roll_checker) - with mock.patch.object(x, "_roll_archive") as ra: - x.write("payload") - self.assertTrue(ra.called) - - def test_write_never_roll(self): - roll_checker = mock.Mock() - roll_checker.check.return_value = False - x = shoebox.WritingRollManager("template", roll_checker) - with mock.patch.object(x, "_roll_archive") as ra: - x.write("payload") - self.assertFalse(ra.called) - -class TestWriting(unittest.TestCase): - def test_write(self): - roll_checker = shoebox.NeverRollChecker() - x = shoebox.WritingRollManager("template_%s", roll_checker) - - for index in range(10): - x.write("payload_%d" % index)